From 58e69a030695500a2b38f0e09250f0c5e04e4c05 Mon Sep 17 00:00:00 2001 From: Ying-Jer Kao Date: Thu, 26 Sep 2024 13:06:01 +0900 Subject: [PATCH 1/6] Replace magma with cusolver LU --- .../linalg_internal_gpu/cuDet_internal.cu | 180 +++++++++++------- 1 file changed, 108 insertions(+), 72 deletions(-) diff --git a/src/backend/linalg_internal_gpu/cuDet_internal.cu b/src/backend/linalg_internal_gpu/cuDet_internal.cu index fee31c49..7cf02265 100644 --- a/src/backend/linalg_internal_gpu/cuDet_internal.cu +++ b/src/backend/linalg_internal_gpu/cuDet_internal.cu @@ -5,152 +5,188 @@ #include "../utils_internal_gpu/cuAlloc_gpu.hpp" -#ifdef UNI_MAGMA - #include "magma_v2.h" -#endif - namespace cytnx { namespace linalg_internal { void cuDet_internal_cd(void* out, const boost::intrusive_ptr& in, const cytnx_uint64& L) { -#ifdef UNI_MAGMA cytnx_complex128* od = (cytnx_complex128*)out; // result on cpu! cuDoubleComplex* _in = (cuDoubleComplex*)utils_internal::cuMalloc_gpu( in->len * sizeof(cuDoubleComplex)); // unify mem. checkCudaErrors( cudaMemcpy(_in, in->Mem, sizeof(cytnx_complex128) * in->len, cudaMemcpyDeviceToDevice)); - magma_int_t* ipiv; - magma_imalloc_cpu(&ipiv, L + 1); - magma_int_t N = L; - magma_int_t info; - magma_zgetrf_gpu(N, N, _in, N, ipiv, &info); - cytnx_error_msg(info != 0, "[ERROR] magma_zgetrf_gpu fail with info= %d\n", info); + cusolverDnHandle_t cusolverH; + cusolverDnCreate(&cusolverH); + + int* devIpiv; + int* devInfo; + checkCudaErrors(cudaMalloc((void**)&devIpiv, L * sizeof(int))); + checkCudaErrors(cudaMalloc((void**)&devInfo, sizeof(int))); + + int workspace_size = 0; + cuDoubleComplex* workspace = NULL; + cusolverDnZgetrf_bufferSize(cusolverH, L, L, _in, L, &workspace_size); + checkCudaErrors(cudaMalloc((void**)&workspace, workspace_size * sizeof(cuDoubleComplex))); + + cusolverDnZgetrf(cusolverH, L, L, _in, L, workspace, devIpiv, devInfo); + + int info; + checkCudaErrors(cudaMemcpy(&info, devInfo, sizeof(int), cudaMemcpyDeviceToHost)); + cytnx_error_msg(info != 0, "[ERROR] cusolverDnZgetrf fail with info= %d\n", info); // since we do unify mem, direct access element is possible: od[0] = 1; bool neg = 0; - for (magma_int_t i = 0; i < N; i++) { - od[0] *= ((cytnx_complex128*)_in)[i * N + i]; + int* ipiv = new int[L]; + checkCudaErrors(cudaMemcpy(ipiv, devIpiv, L * sizeof(int), cudaMemcpyDeviceToHost)); + for (int i = 0; i < L; i++) { + od[0] *= ((cytnx_complex128*)_in)[i * L + i]; if (ipiv[i] != (i + 1)) neg = !neg; } - magma_free_cpu(ipiv); + delete[] ipiv; + cudaFree(devIpiv); + cudaFree(devInfo); + cudaFree(workspace); cudaFree(_in); + cusolverDnDestroy(cusolverH); if (neg) od[0] *= -1; - -#else - cytnx_error_msg(true, - "[ERROR][internal Det] Det for Tensor on GPU require magma. please " - "re-compiling cytnx with magma.%s", - "\n"); -#endif } void cuDet_internal_cf(void* out, const boost::intrusive_ptr& in, const cytnx_uint64& L) { -#ifdef UNI_MAGMA cytnx_complex64* od = (cytnx_complex64*)out; // result on cpu! cuFloatComplex* _in = (cuFloatComplex*)utils_internal::cuMalloc_gpu( in->len * sizeof(cuFloatComplex)); // unify mem. checkCudaErrors( cudaMemcpy(_in, in->Mem, sizeof(cytnx_complex64) * in->len, cudaMemcpyDeviceToDevice)); - magma_int_t* ipiv; - magma_imalloc_cpu(&ipiv, L + 1); - magma_int_t N = L; - magma_int_t info; - magma_cgetrf_gpu(N, N, _in, N, ipiv, &info); - cytnx_error_msg(info != 0, "[ERROR] magma_cgetrf_gpu fail with info= %d\n", info); + cusolverDnHandle_t cusolverH; + cusolverDnCreate(&cusolverH); + + int* devIpiv; + int* devInfo; + checkCudaErrors(cudaMalloc((void**)&devIpiv, L * sizeof(int))); + checkCudaErrors(cudaMalloc((void**)&devInfo, sizeof(int))); + + int workspace_size = 0; + cuFloatComplex* workspace = NULL; + cusolverDnCgetrf_bufferSize(cusolverH, L, L, _in, L, &workspace_size); + checkCudaErrors(cudaMalloc((void**)&workspace, workspace_size * sizeof(cuFloatComplex))); + + cusolverDnCgetrf(cusolverH, L, L, _in, L, workspace, devIpiv, devInfo); + + int info; + checkCudaErrors(cudaMemcpy(&info, devInfo, sizeof(int), cudaMemcpyDeviceToHost)); + cytnx_error_msg(info != 0, "[ERROR] cusolverDnCgetrf fail with info= %d\n", info); // since we do unify mem, direct access element is possible: od[0] = 1; bool neg = 0; - for (magma_int_t i = 0; i < N; i++) { - od[0] *= ((cytnx_complex64*)_in)[i * N + i]; + int* ipiv = new int[L]; + checkCudaErrors(cudaMemcpy(ipiv, devIpiv, L * sizeof(int), cudaMemcpyDeviceToHost)); + for (int i = 0; i < L; i++) { + od[0] *= ((cytnx_complex64*)_in)[i * L + i]; if (ipiv[i] != (i + 1)) neg = !neg; } - magma_free_cpu(ipiv); + delete[] ipiv; + cudaFree(devIpiv); + cudaFree(devInfo); + cudaFree(workspace); cudaFree(_in); + cusolverDnDestroy(cusolverH); if (neg) od[0] *= -1; - -#else - cytnx_error_msg(true, - "[ERROR][internal Det] Det for Tensor on GPU require magma. please " - "re-compiling cytnx with magma.%s", - "\n"); -#endif } void cuDet_internal_d(void* out, const boost::intrusive_ptr& in, const cytnx_uint64& L) { -#ifdef UNI_MAGMA cytnx_double* od = (cytnx_double*)out; // result on cpu! cytnx_double* _in = (cytnx_double*)utils_internal::cuMalloc_gpu(in->len * sizeof(cytnx_double)); // unify mem. checkCudaErrors( cudaMemcpy(_in, in->Mem, sizeof(cytnx_double) * in->len, cudaMemcpyDeviceToDevice)); - magma_int_t* ipiv; - magma_imalloc_cpu(&ipiv, L + 1); - magma_int_t N = L; - magma_int_t info; - magma_dgetrf_gpu(N, N, _in, N, ipiv, &info); - cytnx_error_msg(info != 0, "[ERROR] magma_dgetrf_gpu fail with info= %d\n", info); + cusolverDnHandle_t cusolverH; + cusolverDnCreate(&cusolverH); + + int* devIpiv; + int* devInfo; + checkCudaErrors(cudaMalloc((void**)&devIpiv, L * sizeof(int))); + checkCudaErrors(cudaMalloc((void**)&devInfo, sizeof(int))); + + int workspace_size = 0; + cytnx_double* workspace = NULL; + cusolverDnDgetrf_bufferSize(cusolverH, L, L, _in, L, &workspace_size); + checkCudaErrors(cudaMalloc((void**)&workspace, workspace_size * sizeof(cytnx_double))); + + cusolverDnDgetrf(cusolverH, L, L, _in, L, workspace, devIpiv, devInfo); + + int info; + checkCudaErrors(cudaMemcpy(&info, devInfo, sizeof(int), cudaMemcpyDeviceToHost)); + cytnx_error_msg(info != 0, "[ERROR] cusolverDnDgetrf fail with info= %d\n", info); // since we do unify mem, direct access element is possible: od[0] = 1; bool neg = 0; - for (magma_int_t i = 0; i < N; i++) { - od[0] *= _in[i * N + i]; + int* ipiv = new int[L]; + checkCudaErrors(cudaMemcpy(ipiv, devIpiv, L * sizeof(int), cudaMemcpyDeviceToHost)); + for (int i = 0; i < L; i++) { + od[0] *= _in[i * L + i]; if (ipiv[i] != (i + 1)) neg = !neg; } - magma_free_cpu(ipiv); + delete[] ipiv; + cudaFree(devIpiv); + cudaFree(devInfo); + cudaFree(workspace); cudaFree(_in); + cusolverDnDestroy(cusolverH); if (neg) od[0] *= -1; - -#else - cytnx_error_msg(true, - "[ERROR][internal Det] Det for Tensor on GPU require magma. please " - "re-compiling cytnx with magma.%s", - "\n"); -#endif } void cuDet_internal_f(void* out, const boost::intrusive_ptr& in, const cytnx_uint64& L) { -#ifdef UNI_MAGMA cytnx_float* od = (cytnx_float*)out; // result on cpu! cytnx_float* _in = (cytnx_float*)utils_internal::cuMalloc_gpu(in->len * sizeof(cytnx_float)); // unify mem. checkCudaErrors( cudaMemcpy(_in, in->Mem, sizeof(cytnx_float) * in->len, cudaMemcpyDeviceToDevice)); - magma_int_t* ipiv; - magma_imalloc_cpu(&ipiv, L + 1); - magma_int_t N = L; - magma_int_t info; - magma_sgetrf_gpu(N, N, _in, N, ipiv, &info); - cytnx_error_msg(info != 0, "[ERROR] magma_sgetrf_gpu fail with info= %d\n", info); + cusolverDnHandle_t cusolverH; + cusolverDnCreate(&cusolverH); + + int* devIpiv; + int* devInfo; + checkCudaErrors(cudaMalloc((void**)&devIpiv, L * sizeof(int))); + checkCudaErrors(cudaMalloc((void**)&devInfo, sizeof(int))); + + int workspace_size = 0; + cytnx_float* workspace = NULL; + cusolverDnSgetrf_bufferSize(cusolverH, L, L, _in, L, &workspace_size); + checkCudaErrors(cudaMalloc((void**)&workspace, workspace_size * sizeof(cytnx_float))); + + cusolverDnSgetrf(cusolverH, L, L, _in, L, workspace, devIpiv, devInfo); + + int info; + checkCudaErrors(cudaMemcpy(&info, devInfo, sizeof(int), cudaMemcpyDeviceToHost)); + cytnx_error_msg(info != 0, "[ERROR] cusolverDnSgetrf fail with info= %d\n", info); // since we do unify mem, direct access element is possible: od[0] = 1; bool neg = 0; - for (magma_int_t i = 0; i < N; i++) { - od[0] *= _in[i * N + i]; + int* ipiv = new int[L]; + checkCudaErrors(cudaMemcpy(ipiv, devIpiv, L * sizeof(int), cudaMemcpyDeviceToHost)); + for (int i = 0; i < L; i++) { + od[0] *= _in[i * L + i]; if (ipiv[i] != (i + 1)) neg = !neg; } - magma_free_cpu(ipiv); + delete[] ipiv; + cudaFree(devIpiv); + cudaFree(devInfo); + cudaFree(workspace); cudaFree(_in); + cusolverDnDestroy(cusolverH); if (neg) od[0] *= -1; - -#else - cytnx_error_msg(true, - "[ERROR][internal Det] Det for Tensor on GPU require magma. please " - "re-compiling cytnx with magma.%s", - "\n"); -#endif } } // namespace linalg_internal From 63430180d19472ebc3e00b64d795cc7084dc8e34 Mon Sep 17 00:00:00 2001 From: Ying-Jer Kao Date: Thu, 26 Sep 2024 13:16:44 +0900 Subject: [PATCH 2/6] Remove Magma Dependency --- cmake/Modules/FindCUQUANTUM.cmake | 4 ++-- src/Device.cpp | 16 +--------------- src/backend/linalg_internal_interface.cpp | 20 +------------------- 3 files changed, 4 insertions(+), 36 deletions(-) diff --git a/cmake/Modules/FindCUQUANTUM.cmake b/cmake/Modules/FindCUQUANTUM.cmake index 9df2b081..4f17d0b3 100644 --- a/cmake/Modules/FindCUQUANTUM.cmake +++ b/cmake/Modules/FindCUQUANTUM.cmake @@ -9,10 +9,10 @@ # CUQUANTUM_INCLUDE_DIRS ... cutensor include directory # CUQUANTUM_LIBRARIES ... cutensor libraries # -# MAGMA_ROOT this is required to set! +# CUQUANTUM_ROOT this is required to set! # -#If environment variable MAGMA_ROOT is specified, it has same effect as MAGMA_ROOT +#If environment variable CUQUANTUM_ROOT is specified, it has same effect as CUQUANTUM_ROOT if(NOT DEFINED ENV{CUQUANTUM_ROOT} AND NOT DEFINED CUQUANTUM_ROOT) message(FATAL_ERROR "CUQUANTUM_ROOT not set!") diff --git a/src/Device.cpp b/src/Device.cpp index f7d3b176..8fa2c5b3 100644 --- a/src/Device.cpp +++ b/src/Device.cpp @@ -4,10 +4,6 @@ #include #endif -#ifdef UNI_MAGMA - #include "magma_v2.h" -#endif - using namespace std; namespace cytnx { @@ -43,12 +39,6 @@ namespace cytnx { } } - // #ifdef UNI_MAGMA - // int magma_status = magma_init(); - // cytnx_error_msg(magma_status!=MAGMA_SUCCESS,"[ERROR] magma system cannot - // initialize!%s","\n"); - // #endif - #endif #ifdef UNI_OMP @@ -62,11 +52,7 @@ namespace cytnx { }; Device_class::~Device_class(){ - // #ifdef UNI_MAGMA - // int magma_status = magma_finalize(); - // cytnx_error_msg(magma_status!=MAGMA_SUCCESS,"[ERROR] magma system cannot - // finalize!%s","\n"); - // #endif + }; string Device_class::getname(const int& device_id) { diff --git a/src/backend/linalg_internal_interface.cpp b/src/backend/linalg_internal_interface.cpp index 0ef1e356..47f4e1e1 100644 --- a/src/backend/linalg_internal_interface.cpp +++ b/src/backend/linalg_internal_interface.cpp @@ -2,9 +2,6 @@ #ifdef UNI_MKL #include #endif -#ifdef UNI_MAGMA - #include "magma_v2.h" -#endif using namespace std; @@ -29,27 +26,12 @@ namespace cytnx { #endif return code; } - linalg_internal_interface::~linalg_internal_interface() { -#ifdef UNI_GPU - #ifdef UNI_MAGMA - int magma_status = magma_finalize(); - cytnx_error_msg(magma_status != MAGMA_SUCCESS, "[ERROR] magma system cannot finalize!%s", - "\n"); - #endif -#endif - } + linalg_internal_interface::~linalg_internal_interface() {} linalg_internal_interface::linalg_internal_interface() { mkl_code = -1; #ifdef UNI_MKL this->set_mkl_ilp64(); #endif -#ifdef UNI_GPU - #ifdef UNI_MAGMA - int magma_status = magma_init(); - cytnx_error_msg(magma_status != MAGMA_SUCCESS, "[ERROR] magma system cannot initialize!%s", - "\n"); - #endif -#endif Ari_ii = vector>(N_Type, vector(N_Type, NULL)); From fe24972c2c9b7ce52f311b09432b69ed7c4d0ec6 Mon Sep 17 00:00:00 2001 From: Ying-Jer Kao Date: Thu, 26 Sep 2024 13:17:39 +0900 Subject: [PATCH 3/6] Implement Eig using cusolver --- .../linalg_internal_gpu/cuEig_internal.cu | 262 +++++++++--------- 1 file changed, 132 insertions(+), 130 deletions(-) diff --git a/src/backend/linalg_internal_gpu/cuEig_internal.cu b/src/backend/linalg_internal_gpu/cuEig_internal.cu index 3fc65bd4..35442cf9 100644 --- a/src/backend/linalg_internal_gpu/cuEig_internal.cu +++ b/src/backend/linalg_internal_gpu/cuEig_internal.cu @@ -5,10 +5,6 @@ #include "cuAlloc_gpu.hpp" -#ifdef UNI_MAGMA - #include "magma_v2.h" -#endif - namespace cytnx { namespace linalg_internal { @@ -17,155 +13,161 @@ namespace cytnx { void cuEig_internal_cd(const boost::intrusive_ptr &in, boost::intrusive_ptr &e, boost::intrusive_ptr &v, const cytnx_int64 &L) { - /* - char jobs = 'N'; - - cytnx_complex128 *tA; - cytnx_complex128 *buffer_A = - (cytnx_complex128 *)malloc(cytnx_uint64(L) * L * sizeof(cytnx_complex128)); - memcpy(buffer_A, in->Mem, sizeof(cytnx_complex128) * cytnx_uint64(L) * L); + cusolverDnHandle_t cusolverH = NULL; + cudaStream_t stream = NULL; + cusolverDnCreate(&cusolverH); + cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); + cusolverDnSetStream(cusolverH, stream); + + cuDoubleComplex *d_A, *d_W, *d_V; + cudaMalloc((void **)&d_A, sizeof(cuDoubleComplex) * L * L); + cudaMalloc((void **)&d_W, sizeof(cuDoubleComplex) * L); + cudaMemcpy(d_A, in->Mem, sizeof(cuDoubleComplex) * L * L, cudaMemcpyHostToDevice); + + int lwork = 0; + cusolverDnZgeev_bufferSize(cusolverH, CUSOLVER_EIG_MODE_VECTOR, L, d_A, L, d_W, d_V, L, NULL, + 1, &lwork); + cuDoubleComplex *d_work; + cudaMalloc((void **)&d_work, sizeof(cuDoubleComplex) * lwork); + + int *devInfo; + cudaMalloc((void **)&devInfo, sizeof(int)); + + cusolverDnZgeev(cusolverH, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_OP_N, L, d_A, L, d_W, d_V, L, + NULL, 1, d_work, lwork, devInfo); + + cudaMemcpy(e->Mem, d_W, sizeof(cuDoubleComplex) * L, cudaMemcpyDeviceToHost); if (v->dtype != Type.Void) { - tA = (cytnx_complex128 *)v->Mem; - jobs = 'V'; + cudaMemcpy(v->Mem, d_V, sizeof(cuDoubleComplex) * L * L, cudaMemcpyDeviceToHost); } - lapack_int ldA = L; - lapack_int info; - lapack_int ONE = 1; - - info = LAPACKE_zgeev(LAPACK_COL_MAJOR, jobs, 'N', L, (lapack_complex_double *)buffer_A, ldA, - (lapack_complex_double *)e->Mem, (lapack_complex_double *)tA, L, nullptr, - ONE); - - cytnx_error_msg(info != 0, "%s %d", "Error in Lapack function 'zgeev': Lapack INFO = ", info); - - free(buffer_A); - */ + cudaFree(d_A); + cudaFree(d_W); + cudaFree(d_V); + cudaFree(d_work); + cudaFree(devInfo); + cusolverDnDestroy(cusolverH); + cudaStreamDestroy(stream); } + void cuEig_internal_cf(const boost::intrusive_ptr &in, boost::intrusive_ptr &e, boost::intrusive_ptr &v, const cytnx_int64 &L) { - /* - char jobs = 'N'; - - cytnx_complex64 *tA; - cytnx_complex64 *buffer_A = - (cytnx_complex64 *)malloc(cytnx_uint64(L) * L * sizeof(cytnx_complex64)); - memcpy(buffer_A, in->Mem, sizeof(cytnx_complex64) * cytnx_uint64(L) * L); + cusolverDnHandle_t cusolverH = NULL; + cudaStream_t stream = NULL; + cusolverDnCreate(&cusolverH); + cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); + cusolverDnSetStream(cusolverH, stream); + + cuFloatComplex *d_A, *d_W, *d_V; + cudaMalloc((void **)&d_A, sizeof(cuFloatComplex) * L * L); + cudaMalloc((void **)&d_W, sizeof(cuFloatComplex) * L); + cudaMemcpy(d_A, in->Mem, sizeof(cuFloatComplex) * L * L, cudaMemcpyHostToDevice); + + int lwork = 0; + cusolverDnCgeev_bufferSize(cusolverH, CUSOLVER_EIG_MODE_VECTOR, L, d_A, L, d_W, d_V, L, NULL, + 1, &lwork); + cuFloatComplex *d_work; + cudaMalloc((void **)&d_work, sizeof(cuFloatComplex) * lwork); + + int *devInfo; + cudaMalloc((void **)&devInfo, sizeof(int)); + + cusolverDnCgeev(cusolverH, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_OP_N, L, d_A, L, d_W, d_V, L, + NULL, 1, d_work, lwork, devInfo); + + cudaMemcpy(e->Mem, d_W, sizeof(cuFloatComplex) * L, cudaMemcpyDeviceToHost); if (v->dtype != Type.Void) { - tA = (cytnx_complex64 *)v->Mem; - jobs = 'V'; + cudaMemcpy(v->Mem, d_V, sizeof(cuFloatComplex) * L * L, cudaMemcpyDeviceToHost); } - lapack_int ldA = L; - lapack_int info; - lapack_int ONE = 1; - - info = - LAPACKE_cgeev(LAPACK_COL_MAJOR, jobs, 'N', L, (lapack_complex_float *)buffer_A, ldA, - (lapack_complex_float *)e->Mem, (lapack_complex_float *)tA, L, nullptr, ONE); - - cytnx_error_msg(info != 0, "%s %d", "Error in Lapack function 'cgeev': Lapack INFO = ", info); - - free(buffer_A); - */ + cudaFree(d_A); + cudaFree(d_W); + cudaFree(d_V); + cudaFree(d_work); + cudaFree(devInfo); + cusolverDnDestroy(cusolverH); + cudaStreamDestroy(stream); } void cuEig_internal_d(const boost::intrusive_ptr &in, boost::intrusive_ptr &e, boost::intrusive_ptr &v, const cytnx_int64 &L) { - /* - char jobs = 'N'; - - cytnx_double *tA; - cytnx_double *buffer_A = (cytnx_double*)malloc(cytnx_uint64(L)*L*sizeof(cytnx_double)); - memcpy(buffer_A,in->Mem,sizeof(cytnx_double)*cytnx_uint64(L)*L); - cytnx_double *e_real = (cytnx_double*)malloc(cytnx_uint64(L)*sizeof(cytnx_double)); - cytnx_double *e_imag = (cytnx_double*)malloc(cytnx_uint64(L)*sizeof(cytnx_double)); - - if(v->dtype!=Type.Void){ - tA = (cytnx_double*)v->Mem; - jobs = 'V'; + cusolverDnHandle_t cusolverH = NULL; + cudaStream_t stream = NULL; + cusolverDnCreate(&cusolverH); + cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); + cusolverDnSetStream(cusolverH, stream); + + double *d_A, *d_W, *d_V; + cudaMalloc((void **)&d_A, sizeof(double) * L * L); + cudaMalloc((void **)&d_W, sizeof(double) * L); + cudaMemcpy(d_A, in->Mem, sizeof(double) * L * L, cudaMemcpyHostToDevice); + + int lwork = 0; + cusolverDnDgeev_bufferSize(cusolverH, CUSOLVER_EIG_MODE_VECTOR, L, d_A, L, d_W, d_V, L, NULL, + 1, &lwork); + double *d_work; + cudaMalloc((void **)&d_work, sizeof(double) * lwork); + + int *devInfo; + cudaMalloc((void **)&devInfo, sizeof(int)); + + cusolverDnDgeev(cusolverH, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_OP_N, L, d_A, L, d_W, d_V, L, + NULL, 1, d_work, lwork, devInfo); + + cudaMemcpy(e->Mem, d_W, sizeof(double) * L, cudaMemcpyDeviceToHost); + if (v->dtype != Type.Void) { + cudaMemcpy(v->Mem, d_V, sizeof(double) * L * L, cudaMemcpyDeviceToHost); } - - lapack_int ldA = L; - lapack_int lwork = -1; - cytnx_double *rwork = (cytnx_double*)malloc(sizeof(cytnx_double)*2*L); - cytnx_double workspace = 0; - lapack_int info; - lapack_int ONE = 1; - lapack_int TWO = 2; - /// query lwork - dgeev(&jobs, (char*)"N", &L, buffer_A, &ldA, e_real, e_imag, tA, &L,nullptr, &ONE,&workspace, - &lwork, &info); - - cytnx_error_msg(info != 0, "%s %d", "Error in Lapack function 'dgeev': Lapack INFO = ", info); - lwork = lapack_int(workspace); - cytnx_double* work= (cytnx_double*)malloc(sizeof(cytnx_double)*lwork); - dgeev(&jobs, (char*)"N", &L, buffer_A, &ldA, e_real, e_imag, tA, &L,nullptr, &ONE,work, - &lwork, &info); - - cytnx_error_msg(info != 0, "%s %d", "Error in Lapack function 'dgeev': Lapack INFO = ", info); - - // copy the real and imag to e output: - dcopy(L,e_real, &ONE, (cytnx_double*)e->Mem, &TWO); - dcopy(L,e_imag, &ONE, &(((cytnx_double*)e->Mem)[1]), &TWO); - - free(work); - free(buffer_A); - free(rwork); - free(e_real); - free(e_imag); - */ + cudaFree(d_A); + cudaFree(d_W); + cudaFree(d_V); + cudaFree(d_work); + cudaFree(devInfo); + cusolverDnDestroy(cusolverH); + cudaStreamDestroy(stream); } + void cuEig_internal_f(const boost::intrusive_ptr &in, boost::intrusive_ptr &e, boost::intrusive_ptr &v, const cytnx_int64 &L) { - /* - char jobs = 'N'; - - cytnx_float *tA; - cytnx_float *buffer_A = (cytnx_float*)malloc(cytnx_uint64(L)*L*sizeof(cytnx_float)); - memcpy(buffer_A,in->Mem,sizeof(cytnx_float)*cytnx_uint64(L)*L); - cytnx_float *e_real = (cytnx_float*)malloc(cytnx_uint64(L)*sizeof(cytnx_float)); - cytnx_float *e_imag = (cytnx_float*)malloc(cytnx_uint64(L)*sizeof(cytnx_float)); - - if(v->dtype!=Type.Void){ - tA = (cytnx_float*)v->Mem; - jobs = 'V'; + cusolverDnHandle_t cusolverH = NULL; + cudaStream_t stream = NULL; + cusolverDnCreate(&cusolverH); + cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); + cusolverDnSetStream(cusolverH, stream); + + float *d_A, *d_W, *d_V; + cudaMalloc((void **)&d_A, sizeof(float) * L * L); + cudaMalloc((void **)&d_W, sizeof(float) * L); + cudaMemcpy(d_A, in->Mem, sizeof(float) * L * L, cudaMemcpyHostToDevice); + + int lwork = 0; + cusolverDnSgeev_bufferSize(cusolverH, CUSOLVER_EIG_MODE_VECTOR, L, d_A, L, d_W, d_V, L, NULL, + 1, &lwork); + float *d_work; + cudaMalloc((void **)&d_work, sizeof(float) * lwork); + + int *devInfo; + cudaMalloc((void **)&devInfo, sizeof(int)); + + cusolverDnSgeev(cusolverH, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_OP_N, L, d_A, L, d_W, d_V, L, + NULL, 1, d_work, lwork, devInfo); + + cudaMemcpy(e->Mem, d_W, sizeof(float) * L, cudaMemcpyDeviceToHost); + if (v->dtype != Type.Void) { + cudaMemcpy(v->Mem, d_V, sizeof(float) * L * L, cudaMemcpyDeviceToHost); } - - lapack_int ldA = L; - lapack_int lwork = -1; - cytnx_float *rwork = (cytnx_float*)malloc(sizeof(cytnx_float)*2*L); - cytnx_float workspace = 0; - lapack_int info; - lapack_int ONE = 1; - lapack_int TWO = 2; - /// query lwork - sgeev(&jobs, (char*)"N", &L, buffer_A, &ldA, e_real, e_imag, tA, &L,nullptr, &ONE,&workspace, - &lwork, &info); - - cytnx_error_msg(info != 0, "%s %d", "Error in Lapack function 'sgeev': Lapack INFO = ", info); - lwork = lapack_int(workspace); - cytnx_float* work= (cytnx_float*)malloc(sizeof(cytnx_float)*lwork); - sgeev(&jobs, (char*)"N", &L, buffer_A, &ldA, e_real, e_imag, tA, &L,nullptr, &ONE,work, - &lwork, &info); - - cytnx_error_msg(info != 0, "%s %d", "Error in Lapack function 'sgeev': Lapack INFO = ", info); - - // copy the real and imag to e output: - scopy(L,e_real, &ONE, (cytnx_float*)e->Mem, &TWO); - scopy(L,e_imag, &ONE, &(((cytnx_float*)e->Mem)[1]), &TWO); - - free(work); - free(buffer_A); - free(rwork); - free(e_real); - free(e_imag); - */ + cudaFree(d_A); + cudaFree(d_W); + cudaFree(d_V); + cudaFree(d_work); + cudaFree(devInfo); + cusolverDnDestroy(cusolverH); + cudaStreamDestroy(stream); } } // namespace linalg_internal From 81dc6973b7634602b31b599e8cfc6c342e7142ea Mon Sep 17 00:00:00 2001 From: Ying-Jer Kao Date: Thu, 26 Sep 2024 13:19:10 +0900 Subject: [PATCH 4/6] Remove Magma dependency --- CMakeLists.txt | 1 - CytnxBKNDCMakeLists.cmake | 20 -------------------- 2 files changed, 21 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ed854be5..8cf0d85d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -85,7 +85,6 @@ option(BUILD_DOC "Build API docuemntation" OFF) option(USE_HPTT "Build Cytnx with HPTT" OFF) option(RUN_TESTS "Run Cytnx tests" OFF) option(USE_CUTT "Build Cytnx with CUTT" OFF) -option(USE_MAGMA "Build Cytnx with MAGMA (requires CUDA)" ON) option(USE_CUTENSOR "Build Cytnx with CuTensor (requires CUDA)" ON) option(USE_CUQUANTUM "Build Cytnx with CUQuantum (requires CUDA)" ON) diff --git a/CytnxBKNDCMakeLists.cmake b/CytnxBKNDCMakeLists.cmake index 21213ed5..a4adbafb 100644 --- a/CytnxBKNDCMakeLists.cmake +++ b/CytnxBKNDCMakeLists.cmake @@ -169,26 +169,6 @@ if(USE_CUDA) endif() - if(USE_MAGMA) - find_package( MAGMA REQUIRED) - if(NOT MAGMA_FOUND) - message(FATAL_ERROR "MAGMA not found!") - endif() - message(STATUS "^^^magma root aft: ${MAGMA_ROOT}") - message(STATUS "^^^magma inc dr: ${MAGMA_INCLUDE_DIRS}") - message(STATUS "^^^magma lib dr: ${MAGMA_LIBRARY_DIRS}") - message(STATUS "^^^magma libs: ${MAGMA_LIBRARIES}") - #add_dependencies(cytnx magma) - target_include_directories(cytnx PRIVATE ${MAGMA_INCLUDE_DIRS}) - target_compile_definitions(cytnx PRIVATE UNI_MAGMA) - target_link_libraries(cytnx PUBLIC ${MAGMA_LIBRARIES}) - - message( STATUS "Build with MAGMA: YES") - FILE(APPEND "${CMAKE_BINARY_DIR}/cxxflags.tmp" "-DUNI_MAGMA\n" "") - FILE(APPEND "${CMAKE_BINARY_DIR}/cxxflags.tmp" "-I${MAGMA_INCLUDE_DIRS}\n" "") - FILE(APPEND "${CMAKE_BINARY_DIR}/linkflags.tmp" "${MAGMA_LIBRARIES} -ldl\n" "") # use > to indicate special rt processing - message( STATUS "MAGMA: libdir:${MAGMA_LIBRARY_DIRS} incdir:${MAGMA_INCLUDE_DIRS} libs:${MAGMA_LIBRARIES}") - endif() message( STATUS " Build CUDA Support: YES") message( STATUS " - CUDA Version: ${CUDA_VERSION_STRING}") From 35c95aa2296d8a37afb56dab50dddbb38d04070d Mon Sep 17 00:00:00 2001 From: jeffry1829 Date: Tue, 1 Oct 2024 16:23:02 +0800 Subject: [PATCH 5/6] fix (cu)Det error when U is singular & fix Det output device to gpu when original tensor is in gpu & add tests for Det, make gpu test for Directsum actually happens at gpu --- .../linalg_internal_cpu/Det_internal.cpp | 20 ++- .../linalg_internal_gpu/cuDet_internal.cu | 20 ++- src/linalg/Det.cpp | 4 +- tests/CMakeLists.txt | 1 + tests/gpu/CMakeLists.txt | 1 + tests/gpu/linalg_test/Det_test.cpp | 161 ++++++++++++++++++ tests/gpu/linalg_test/Directsum_test.cpp | 2 +- tests/gpu/test_tools.h | 4 +- tests/linalg_test/Det_test.cpp | 158 +++++++++++++++++ 9 files changed, 358 insertions(+), 13 deletions(-) create mode 100644 tests/gpu/linalg_test/Det_test.cpp create mode 100644 tests/linalg_test/Det_test.cpp diff --git a/src/backend/linalg_internal_cpu/Det_internal.cpp b/src/backend/linalg_internal_cpu/Det_internal.cpp index c507059e..c0bdfd14 100644 --- a/src/backend/linalg_internal_cpu/Det_internal.cpp +++ b/src/backend/linalg_internal_cpu/Det_internal.cpp @@ -27,8 +27,9 @@ namespace cytnx { lapack_int N = L; lapack_int info; info = LAPACKE_zgetrf(LAPACK_COL_MAJOR, N, N, _Rin, N, ipiv); + // If the info > 0, that means the U factor is exactly singular, and the det is 0. cytnx_error_msg( - info != 0, "%s %d", + info < 0, "%s %d", "[ERROR][Det_internal] Error in Lapack function 'zgetrf': Lapack INFO = ", info); // Whether lapack_complex_T is defined as std::complex (c++ complex) or T _Complex @@ -51,6 +52,8 @@ namespace cytnx { free(ipiv); free(_Rin); if (neg) od[0] *= -1; + + if (info > 0) od[0] = 0; } void Det_internal_cf(void *out, const boost::intrusive_ptr &Rin, const cytnx_uint64 &L) { @@ -63,8 +66,9 @@ namespace cytnx { lapack_int N = L; lapack_int info; info = LAPACKE_cgetrf(LAPACK_COL_MAJOR, N, N, _Rin, N, ipiv); + // If the info > 0, that means the U factor is exactly singular, and the det is 0. cytnx_error_msg( - info != 0, "%s %d", + info < 0, "%s %d", "[ERROR][Det_internal] Error in Lapack function 'cgetrf': Lapack INFO = ", info); // Whether lapack_complex_T is defined as std::complex (c++ complex) or T _Complex @@ -87,6 +91,8 @@ namespace cytnx { free(ipiv); free(_Rin); if (neg) od[0] *= -1; + + if (info > 0) od[0] = 0; } void Det_internal_d(void *out, const boost::intrusive_ptr &Rin, const cytnx_uint64 &L) { @@ -98,8 +104,9 @@ namespace cytnx { lapack_int N = L; lapack_int info; info = LAPACKE_dgetrf(LAPACK_COL_MAJOR, N, N, _Rin, N, ipiv); + // If the info > 0, that means the U factor is exactly singular, and the det is 0. cytnx_error_msg( - info != 0, "%s %d", + info < 0, "%s %d", "[ERROR][Det_internal] Error in Lapack function 'dgetrf': Lapack INFO = ", info); od[0] = 1; bool neg = 0; @@ -110,6 +117,8 @@ namespace cytnx { free(ipiv); free(_Rin); if (neg) od[0] *= -1; + + if (info > 0) od[0] = 0; } void Det_internal_f(void *out, const boost::intrusive_ptr &Rin, const cytnx_uint64 &L) { @@ -121,8 +130,9 @@ namespace cytnx { lapack_int N = L; lapack_int info; info = LAPACKE_sgetrf(LAPACK_COL_MAJOR, N, N, _Rin, N, ipiv); + // If the info > 0, that means the U factor is exactly singular, and the det is 0. cytnx_error_msg( - info != 0, "%s %d", + info < 0, "%s %d", "[ERROR][Det_internal] Error in Lapack function 'sgetrf': Lapack INFO = ", info); od[0] = 1; bool neg = 0; @@ -133,6 +143,8 @@ namespace cytnx { free(ipiv); free(_Rin); if (neg) od[0] *= -1; + + if (info > 0) od[0] = 0; } } // namespace linalg_internal diff --git a/src/backend/linalg_internal_gpu/cuDet_internal.cu b/src/backend/linalg_internal_gpu/cuDet_internal.cu index 7cf02265..5d8a5c81 100644 --- a/src/backend/linalg_internal_gpu/cuDet_internal.cu +++ b/src/backend/linalg_internal_gpu/cuDet_internal.cu @@ -34,7 +34,8 @@ namespace cytnx { int info; checkCudaErrors(cudaMemcpy(&info, devInfo, sizeof(int), cudaMemcpyDeviceToHost)); - cytnx_error_msg(info != 0, "[ERROR] cusolverDnZgetrf fail with info= %d\n", info); + // If the info > 0, that means the U factor is exactly singular, and the det is 0. + cytnx_error_msg(info < 0, "[ERROR] cusolverDnZgetrf fail with info= %d\n", info); // since we do unify mem, direct access element is possible: od[0] = 1; @@ -52,6 +53,8 @@ namespace cytnx { cudaFree(_in); cusolverDnDestroy(cusolverH); if (neg) od[0] *= -1; + + if (info > 0) od[0] = 0; } void cuDet_internal_cf(void* out, const boost::intrusive_ptr& in, @@ -79,7 +82,8 @@ namespace cytnx { int info; checkCudaErrors(cudaMemcpy(&info, devInfo, sizeof(int), cudaMemcpyDeviceToHost)); - cytnx_error_msg(info != 0, "[ERROR] cusolverDnCgetrf fail with info= %d\n", info); + // If the info > 0, that means the U factor is exactly singular, and the det is 0. + cytnx_error_msg(info < 0, "[ERROR] cusolverDnCgetrf fail with info= %d\n", info); // since we do unify mem, direct access element is possible: od[0] = 1; @@ -97,6 +101,8 @@ namespace cytnx { cudaFree(_in); cusolverDnDestroy(cusolverH); if (neg) od[0] *= -1; + + if (info > 0) od[0] = 0; } void cuDet_internal_d(void* out, const boost::intrusive_ptr& in, @@ -124,7 +130,8 @@ namespace cytnx { int info; checkCudaErrors(cudaMemcpy(&info, devInfo, sizeof(int), cudaMemcpyDeviceToHost)); - cytnx_error_msg(info != 0, "[ERROR] cusolverDnDgetrf fail with info= %d\n", info); + // If the info > 0, that means the U factor is exactly singular, and the det is 0. + cytnx_error_msg(info < 0, "[ERROR] cusolverDnDgetrf fail with info= %d\n", info); // since we do unify mem, direct access element is possible: od[0] = 1; @@ -142,6 +149,8 @@ namespace cytnx { cudaFree(_in); cusolverDnDestroy(cusolverH); if (neg) od[0] *= -1; + + if (info > 0) od[0] = 0; } void cuDet_internal_f(void* out, const boost::intrusive_ptr& in, @@ -169,7 +178,8 @@ namespace cytnx { int info; checkCudaErrors(cudaMemcpy(&info, devInfo, sizeof(int), cudaMemcpyDeviceToHost)); - cytnx_error_msg(info != 0, "[ERROR] cusolverDnSgetrf fail with info= %d\n", info); + // If the info > 0, that means the U factor is exactly singular, and the det is 0. + cytnx_error_msg(info < 0, "[ERROR] cusolverDnSgetrf fail with info= %d\n", info); // since we do unify mem, direct access element is possible: od[0] = 1; @@ -187,6 +197,8 @@ namespace cytnx { cudaFree(_in); cusolverDnDestroy(cusolverH); if (neg) od[0] *= -1; + + if (info > 0) od[0] = 0; } } // namespace linalg_internal diff --git a/src/linalg/Det.cpp b/src/linalg/Det.cpp index 938d95e6..a0566699 100644 --- a/src/linalg/Det.cpp +++ b/src/linalg/Det.cpp @@ -22,9 +22,9 @@ namespace cytnx { if (Tl.dtype() > 4) { // do conversion: _tl = _tl.astype(Type.Double); - out.Init({1}, Type.Double, Device.cpu); // scalar, so on cpu always! + out.Init({1}, Type.Double, _tl.device()); } else { - out.Init({1}, _tl.dtype(), Device.cpu); // scalar, so on cpu always! + out.Init({1}, _tl.dtype(), _tl.device()); } if (Tl.device() == Device.cpu) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 1c1b1a40..b665d93c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -30,6 +30,7 @@ add_executable( linalg_test/Svd_test.cpp linalg_test/GeSvd_test.cpp linalg_test/linalg_test.cpp + linalg_test/Det_test.cpp algo_test/Hsplit_test.cpp algo_test/Hstack_test.cpp algo_test/Vsplit_test.cpp diff --git a/tests/gpu/CMakeLists.txt b/tests/gpu/CMakeLists.txt index a76fdc6a..116718f1 100644 --- a/tests/gpu/CMakeLists.txt +++ b/tests/gpu/CMakeLists.txt @@ -22,6 +22,7 @@ add_executable( linalg_test/Svd_test.cpp linalg_test/GeSvd_test.cpp linalg_test/linalg_test.cpp + linalg_test/Det_test.cpp algo_test/Hsplit_test.cpp algo_test/Hstack_test.cpp algo_test/Vsplit_test.cpp diff --git a/tests/gpu/linalg_test/Det_test.cpp b/tests/gpu/linalg_test/Det_test.cpp new file mode 100644 index 00000000..d941a935 --- /dev/null +++ b/tests/gpu/linalg_test/Det_test.cpp @@ -0,0 +1,161 @@ +#include "cytnx.hpp" +#include +#include "../test_tools.h" + +using namespace cytnx; +using namespace testing; +using namespace TestTools; + +namespace DetTest { + + static cytnx_uint64 rand_seed1, rand_seed2; + + void ExcuteDetTest(const Tensor& T); + + void ErrorTestExcute(const Tensor& T); + + /*=====test info===== + describe:Test all possible data type and check the results. + input: + T:Tensor with shape {6, 6} or {2, 2}, {1, 1}, {3, 3}, {4, 4} and test for all + possilbe data type. + ====================*/ + TEST(Det, allDType) { + for (auto device : device_list) { + for (auto dtype1 : dtype_list) { + for (auto dtype2 : dtype_list) { + // The GPU version of Det is just too slow. + // So we lower the size of the tensor from 6,6 to 3,3. + Tensor T = Tensor({3, 3}, dtype1, device); + InitTensorUniform(T, rand_seed1 = 3); + ExcuteDetTest(T); + + T = Tensor({2, 2}, dtype1, device); + InitTensorUniform(T, rand_seed1 = 3); + ExcuteDetTest(T); + + T = Tensor({1, 1}, dtype1, device); + InitTensorUniform(T, rand_seed1 = 3); + ExcuteDetTest(T); + + T = Tensor({3, 3}, dtype1, device); + InitTensorUniform(T, rand_seed1 = 3); + ExcuteDetTest(T); + + T = Tensor({4, 4}, dtype1, device); + InitTensorUniform(T, rand_seed1 = 3); + ExcuteDetTest(T); + } + } + } + } + + /*=====test info===== + describe:Test the tensor only 1 have one element. Test for all possible data type. + input: + T:Tensor with shape {1} on cpu, testing for all possible data type. + ====================*/ + TEST(Det, one_elem_tens) { + for (auto dtype : dtype_list) { + Tensor T = Tensor({1, 1}, dtype); + InitTensorUniform(T, rand_seed1 = 3); + ExcuteDetTest(T); + } + } + + /*=====test info===== + describe:Test for not contiguous tensor. + input: + T:Double data type not contiguous tensor with shape {9,9} on cpu. + ====================*/ + TEST(Det, not_contiguous) { + Tensor T = Tensor({9, 9}, Type.Double); + InitTensorUniform(T, rand_seed1 = 1); + // permute then they will not contiguous + T.permute_({1, 0}); // shape:[9,9] + ExcuteDetTest(T); + } + + // error test + /*=====test info===== + describe:Test the input tensors are both void tensor. + input: + T:void tensor + ====================*/ + TEST(Det, err_void_tens) { + Tensor T = Tensor(); + ErrorTestExcute(T); + } + + /*=====test info===== + describe:Test contains shared axis of the tensors are not same. + input: + T:double type tensor with shape {2, 3} on cpu. + ====================*/ + TEST(Det, err_axis_dim_wrong) { + Tensor T = Tensor({2, 3}); + InitTensorUniform(T, rand_seed1 = 0); + ErrorTestExcute(T); + } + + Tensor ConstructExpectTens(const Tensor& T) { + Tensor dst_T = zeros(1, T.dtype(), T.device()); + int n = T.shape()[0]; + cytnx_error_msg( + T.shape().size() != 2, + "[ERROR] [Det_test] [ConstructExpectTens]The input tensor should be rank-2 tensor.%s", "\n"); + for (auto s : T.shape()) { + cytnx_error_msg( + s != n, + "[ERROR] [Det_test] [ConstructExpectTens]The input tensor should be square matrix.%s", + "\n"); + } + if (n == 1) { + dst_T.at({0}) = T.at({0, 0}); + return dst_T; + } else if (n == 2) { + dst_T.at({0}) = T.at({0, 0}) * T.at({1, 1}) - T.at({0, 1}) * T.at({1, 0}); + return dst_T; + } + for (cytnx_uint64 a = 0; a < n; a++) { + Tensor T2 = zeros({T.shape()[0] - 1, T.shape()[1] - 1}, T.dtype(), T.device()); + for (cytnx_uint64 i = 0; i < n - 1; i++) { + for (cytnx_uint64 j = 0; j < n - 1; j++) { + cytnx_uint64 ii = i + (i >= 0); + cytnx_uint64 jj = j + (j >= a); + T2.at({i, j}) = T.at({ii, jj}); + } + } + dst_T({0}) += (a % 2 == 0 ? 1 : -1) * T.at({0, a}) * ConstructExpectTens(T2); + } + return dst_T; + } + + void ExcuteDetTest(const Tensor& T) { + Tensor det_T = linalg::Det(T); + Tensor expect_T; + expect_T = + ConstructExpectTens(T.dtype() > 4 ? T.astype(Type.Double).to(-1) : T.to(-1)).to(T.device()); + // std::cout << det_T << expect_T << std::endl; + const double tolerance = + std::pow(10.0, std::max((int)std::abs(std::log10(det_T(0).item())) - 6, 0)); + // Because the determinant will be casted to at least double, so we just cast the result to + // ComplexDouble for comparison. + std::cout << "Type: " << Type.getname(T.dtype()) << std::endl; + EXPECT_TRUE(AreNearlyEqTensor(det_T.astype(Type.ComplexDouble), + expect_T.astype(Type.ComplexDouble), tolerance)); + } + + void ErrorTestExcute(const Tensor& T) { + try { + auto dirsum_T = linalg::Det(T); + std::cerr << "[Test Error] This test should throw error but not !" << std::endl; + FAIL(); + } catch (const std::exception& ex) { + auto err_msg = ex.what(); + std::cerr << err_msg << std::endl; + SUCCEED(); + } + } + +} // namespace DetTest diff --git a/tests/gpu/linalg_test/Directsum_test.cpp b/tests/gpu/linalg_test/Directsum_test.cpp index a5f65e47..947961da 100644 --- a/tests/gpu/linalg_test/Directsum_test.cpp +++ b/tests/gpu/linalg_test/Directsum_test.cpp @@ -24,7 +24,7 @@ namespace DirectsumTest { axes:{1} ====================*/ TEST(Directsum, gpu_allDType) { - for (auto device : device_list) { // now only test for cpu device. + for (auto device : device_list) { for (auto dtype1 : dtype_list) { for (auto dtype2 : dtype_list) { Tensor T1 = Tensor({12, 5, 7}, dtype1, device); diff --git a/tests/gpu/test_tools.h b/tests/gpu/test_tools.h index 81edfa3c..d4871289 100644 --- a/tests/gpu/test_tools.h +++ b/tests/gpu/test_tools.h @@ -35,8 +35,8 @@ namespace TestTools { Type.Int32, Type.Uint32, Type.Int16, Type.Uint16, Type.Bool}; static std::vector device_list = { - Device.cpu, - // Device.cuda, //currently cuda version still not implement + // Device.cpu, + Device.cuda, }; // Tensor tools diff --git a/tests/linalg_test/Det_test.cpp b/tests/linalg_test/Det_test.cpp new file mode 100644 index 00000000..f40d168c --- /dev/null +++ b/tests/linalg_test/Det_test.cpp @@ -0,0 +1,158 @@ +#include "cytnx.hpp" +#include +#include "../test_tools.h" + +using namespace cytnx; +using namespace testing; +using namespace TestTools; + +namespace DetTest { + + static cytnx_uint64 rand_seed1, rand_seed2; + + void ExcuteDetTest(const Tensor& T); + + void ErrorTestExcute(const Tensor& T); + + /*=====test info===== + describe:Test all possible data type and check the results. + input: + T:Tensor with shape {6, 6} or {2, 2}, {1, 1}, {3, 3}, {4, 4} and test for all + possilbe data type. + ====================*/ + TEST(Det, allDType) { + for (auto device : device_list) { // now only test for cpu device. + for (auto dtype1 : dtype_list) { + for (auto dtype2 : dtype_list) { + Tensor T = Tensor({6, 6}, dtype1, device); + InitTensorUniform(T, rand_seed1 = 3); + ExcuteDetTest(T); + + T = Tensor({2, 2}, dtype1, device); + InitTensorUniform(T, rand_seed1 = 3); + ExcuteDetTest(T); + + T = Tensor({1, 1}, dtype1, device); + InitTensorUniform(T, rand_seed1 = 3); + ExcuteDetTest(T); + + T = Tensor({3, 3}, dtype1, device); + InitTensorUniform(T, rand_seed1 = 3); + ExcuteDetTest(T); + + T = Tensor({4, 4}, dtype1, device); + InitTensorUniform(T, rand_seed1 = 3); + ExcuteDetTest(T); + } + } + } + } + + /*=====test info===== + describe:Test the tensor only 1 have one element. Test for all possible data type. + input: + T:Tensor with shape {1} on cpu, testing for all possible data type. + ====================*/ + TEST(Det, one_elem_tens) { + for (auto dtype : dtype_list) { + Tensor T = Tensor({1, 1}, dtype); + InitTensorUniform(T, rand_seed1 = 3); + ExcuteDetTest(T); + } + } + + /*=====test info===== + describe:Test for not contiguous tensor. + input: + T:Double data type not contiguous tensor with shape {9,9} on cpu. + ====================*/ + TEST(Det, not_contiguous) { + Tensor T = Tensor({9, 9}, Type.Double); + InitTensorUniform(T, rand_seed1 = 1); + // permute then they will not contiguous + T.permute_({1, 0}); // shape:[9,9] + ExcuteDetTest(T); + } + + // error test + /*=====test info===== + describe:Test the input tensors are both void tensor. + input: + T:void tensor + ====================*/ + TEST(Det, err_void_tens) { + Tensor T = Tensor(); + ErrorTestExcute(T); + } + + /*=====test info===== + describe:Test contains shared axis of the tensors are not same. + input: + T:double type tensor with shape {2, 3} on cpu. + ====================*/ + TEST(Det, err_axis_dim_wrong) { + Tensor T = Tensor({2, 3}); + InitTensorUniform(T, rand_seed1 = 0); + ErrorTestExcute(T); + } + + Tensor ConstructExpectTens(const Tensor& T) { + Tensor dst_T = zeros(1, T.dtype(), T.device()); + int n = T.shape()[0]; + cytnx_error_msg( + T.shape().size() != 2, + "[ERROR] [Det_test] [ConstructExpectTens]The input tensor should be rank-2 tensor.%s", "\n"); + for (auto s : T.shape()) { + cytnx_error_msg( + s != n, + "[ERROR] [Det_test] [ConstructExpectTens]The input tensor should be square matrix.%s", + "\n"); + } + if (n == 1) { + dst_T.at({0}) = T.at({0, 0}); + return dst_T; + } else if (n == 2) { + dst_T.at({0}) = T.at({0, 0}) * T.at({1, 1}) - T.at({0, 1}) * T.at({1, 0}); + return dst_T; + } + for (cytnx_uint64 a = 0; a < n; a++) { + Tensor T2 = zeros({T.shape()[0] - 1, T.shape()[1] - 1}, T.dtype(), T.device()); + for (cytnx_uint64 i = 0; i < n - 1; i++) { + for (cytnx_uint64 j = 0; j < n - 1; j++) { + cytnx_uint64 ii = i + (i >= 0); + cytnx_uint64 jj = j + (j >= a); + T2.at({i, j}) = T.at({ii, jj}); + } + } + dst_T({0}) += (a % 2 == 0 ? 1 : -1) * T.at({0, a}) * ConstructExpectTens(T2); + } + return dst_T; + } + + void ExcuteDetTest(const Tensor& T) { + Tensor det_T = linalg::Det(T); + Tensor expect_T; + expect_T = ConstructExpectTens(T.dtype() > 4 ? T.astype(Type.Double) : T); + // std::cout << det_T << expect_T << std::endl; + const double tolerance = + std::pow(10.0, std::max((int)std::abs(std::log10(det_T(0).item())) - 6, 0)); + // Because the determinant will be casted to at least double, so we just cast the result to + // ComplexDouble for comparison. + std::cout << "Type: " << Type.getname(T.dtype()) << std::endl; + EXPECT_TRUE(AreNearlyEqTensor(det_T.astype(Type.ComplexDouble), + expect_T.astype(Type.ComplexDouble), tolerance)); + } + + void ErrorTestExcute(const Tensor& T) { + try { + auto dirsum_T = linalg::Det(T); + std::cerr << "[Test Error] This test should throw error but not !" << std::endl; + FAIL(); + } catch (const std::exception& ex) { + auto err_msg = ex.what(); + std::cerr << err_msg << std::endl; + SUCCEED(); + } + } + +} // namespace DetTest From cd22a1c5a3326dd2a7315326e7527ac7855563dd Mon Sep 17 00:00:00 2001 From: jeffry1829 Date: Wed, 2 Oct 2024 19:05:22 +0800 Subject: [PATCH 6/6] Revert "fix (cu)Det error when U is singular &" This reverts commit 35c95aa2296d8a37afb56dab50dddbb38d04070d. --- .../linalg_internal_cpu/Det_internal.cpp | 20 +-- .../linalg_internal_gpu/cuDet_internal.cu | 20 +-- src/linalg/Det.cpp | 4 +- tests/CMakeLists.txt | 1 - tests/gpu/CMakeLists.txt | 1 - tests/gpu/linalg_test/Det_test.cpp | 161 ------------------ tests/gpu/linalg_test/Directsum_test.cpp | 2 +- tests/gpu/test_tools.h | 4 +- tests/linalg_test/Det_test.cpp | 158 ----------------- 9 files changed, 13 insertions(+), 358 deletions(-) delete mode 100644 tests/gpu/linalg_test/Det_test.cpp delete mode 100644 tests/linalg_test/Det_test.cpp diff --git a/src/backend/linalg_internal_cpu/Det_internal.cpp b/src/backend/linalg_internal_cpu/Det_internal.cpp index c0bdfd14..c507059e 100644 --- a/src/backend/linalg_internal_cpu/Det_internal.cpp +++ b/src/backend/linalg_internal_cpu/Det_internal.cpp @@ -27,9 +27,8 @@ namespace cytnx { lapack_int N = L; lapack_int info; info = LAPACKE_zgetrf(LAPACK_COL_MAJOR, N, N, _Rin, N, ipiv); - // If the info > 0, that means the U factor is exactly singular, and the det is 0. cytnx_error_msg( - info < 0, "%s %d", + info != 0, "%s %d", "[ERROR][Det_internal] Error in Lapack function 'zgetrf': Lapack INFO = ", info); // Whether lapack_complex_T is defined as std::complex (c++ complex) or T _Complex @@ -52,8 +51,6 @@ namespace cytnx { free(ipiv); free(_Rin); if (neg) od[0] *= -1; - - if (info > 0) od[0] = 0; } void Det_internal_cf(void *out, const boost::intrusive_ptr &Rin, const cytnx_uint64 &L) { @@ -66,9 +63,8 @@ namespace cytnx { lapack_int N = L; lapack_int info; info = LAPACKE_cgetrf(LAPACK_COL_MAJOR, N, N, _Rin, N, ipiv); - // If the info > 0, that means the U factor is exactly singular, and the det is 0. cytnx_error_msg( - info < 0, "%s %d", + info != 0, "%s %d", "[ERROR][Det_internal] Error in Lapack function 'cgetrf': Lapack INFO = ", info); // Whether lapack_complex_T is defined as std::complex (c++ complex) or T _Complex @@ -91,8 +87,6 @@ namespace cytnx { free(ipiv); free(_Rin); if (neg) od[0] *= -1; - - if (info > 0) od[0] = 0; } void Det_internal_d(void *out, const boost::intrusive_ptr &Rin, const cytnx_uint64 &L) { @@ -104,9 +98,8 @@ namespace cytnx { lapack_int N = L; lapack_int info; info = LAPACKE_dgetrf(LAPACK_COL_MAJOR, N, N, _Rin, N, ipiv); - // If the info > 0, that means the U factor is exactly singular, and the det is 0. cytnx_error_msg( - info < 0, "%s %d", + info != 0, "%s %d", "[ERROR][Det_internal] Error in Lapack function 'dgetrf': Lapack INFO = ", info); od[0] = 1; bool neg = 0; @@ -117,8 +110,6 @@ namespace cytnx { free(ipiv); free(_Rin); if (neg) od[0] *= -1; - - if (info > 0) od[0] = 0; } void Det_internal_f(void *out, const boost::intrusive_ptr &Rin, const cytnx_uint64 &L) { @@ -130,9 +121,8 @@ namespace cytnx { lapack_int N = L; lapack_int info; info = LAPACKE_sgetrf(LAPACK_COL_MAJOR, N, N, _Rin, N, ipiv); - // If the info > 0, that means the U factor is exactly singular, and the det is 0. cytnx_error_msg( - info < 0, "%s %d", + info != 0, "%s %d", "[ERROR][Det_internal] Error in Lapack function 'sgetrf': Lapack INFO = ", info); od[0] = 1; bool neg = 0; @@ -143,8 +133,6 @@ namespace cytnx { free(ipiv); free(_Rin); if (neg) od[0] *= -1; - - if (info > 0) od[0] = 0; } } // namespace linalg_internal diff --git a/src/backend/linalg_internal_gpu/cuDet_internal.cu b/src/backend/linalg_internal_gpu/cuDet_internal.cu index 5d8a5c81..7cf02265 100644 --- a/src/backend/linalg_internal_gpu/cuDet_internal.cu +++ b/src/backend/linalg_internal_gpu/cuDet_internal.cu @@ -34,8 +34,7 @@ namespace cytnx { int info; checkCudaErrors(cudaMemcpy(&info, devInfo, sizeof(int), cudaMemcpyDeviceToHost)); - // If the info > 0, that means the U factor is exactly singular, and the det is 0. - cytnx_error_msg(info < 0, "[ERROR] cusolverDnZgetrf fail with info= %d\n", info); + cytnx_error_msg(info != 0, "[ERROR] cusolverDnZgetrf fail with info= %d\n", info); // since we do unify mem, direct access element is possible: od[0] = 1; @@ -53,8 +52,6 @@ namespace cytnx { cudaFree(_in); cusolverDnDestroy(cusolverH); if (neg) od[0] *= -1; - - if (info > 0) od[0] = 0; } void cuDet_internal_cf(void* out, const boost::intrusive_ptr& in, @@ -82,8 +79,7 @@ namespace cytnx { int info; checkCudaErrors(cudaMemcpy(&info, devInfo, sizeof(int), cudaMemcpyDeviceToHost)); - // If the info > 0, that means the U factor is exactly singular, and the det is 0. - cytnx_error_msg(info < 0, "[ERROR] cusolverDnCgetrf fail with info= %d\n", info); + cytnx_error_msg(info != 0, "[ERROR] cusolverDnCgetrf fail with info= %d\n", info); // since we do unify mem, direct access element is possible: od[0] = 1; @@ -101,8 +97,6 @@ namespace cytnx { cudaFree(_in); cusolverDnDestroy(cusolverH); if (neg) od[0] *= -1; - - if (info > 0) od[0] = 0; } void cuDet_internal_d(void* out, const boost::intrusive_ptr& in, @@ -130,8 +124,7 @@ namespace cytnx { int info; checkCudaErrors(cudaMemcpy(&info, devInfo, sizeof(int), cudaMemcpyDeviceToHost)); - // If the info > 0, that means the U factor is exactly singular, and the det is 0. - cytnx_error_msg(info < 0, "[ERROR] cusolverDnDgetrf fail with info= %d\n", info); + cytnx_error_msg(info != 0, "[ERROR] cusolverDnDgetrf fail with info= %d\n", info); // since we do unify mem, direct access element is possible: od[0] = 1; @@ -149,8 +142,6 @@ namespace cytnx { cudaFree(_in); cusolverDnDestroy(cusolverH); if (neg) od[0] *= -1; - - if (info > 0) od[0] = 0; } void cuDet_internal_f(void* out, const boost::intrusive_ptr& in, @@ -178,8 +169,7 @@ namespace cytnx { int info; checkCudaErrors(cudaMemcpy(&info, devInfo, sizeof(int), cudaMemcpyDeviceToHost)); - // If the info > 0, that means the U factor is exactly singular, and the det is 0. - cytnx_error_msg(info < 0, "[ERROR] cusolverDnSgetrf fail with info= %d\n", info); + cytnx_error_msg(info != 0, "[ERROR] cusolverDnSgetrf fail with info= %d\n", info); // since we do unify mem, direct access element is possible: od[0] = 1; @@ -197,8 +187,6 @@ namespace cytnx { cudaFree(_in); cusolverDnDestroy(cusolverH); if (neg) od[0] *= -1; - - if (info > 0) od[0] = 0; } } // namespace linalg_internal diff --git a/src/linalg/Det.cpp b/src/linalg/Det.cpp index a0566699..938d95e6 100644 --- a/src/linalg/Det.cpp +++ b/src/linalg/Det.cpp @@ -22,9 +22,9 @@ namespace cytnx { if (Tl.dtype() > 4) { // do conversion: _tl = _tl.astype(Type.Double); - out.Init({1}, Type.Double, _tl.device()); + out.Init({1}, Type.Double, Device.cpu); // scalar, so on cpu always! } else { - out.Init({1}, _tl.dtype(), _tl.device()); + out.Init({1}, _tl.dtype(), Device.cpu); // scalar, so on cpu always! } if (Tl.device() == Device.cpu) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b665d93c..1c1b1a40 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -30,7 +30,6 @@ add_executable( linalg_test/Svd_test.cpp linalg_test/GeSvd_test.cpp linalg_test/linalg_test.cpp - linalg_test/Det_test.cpp algo_test/Hsplit_test.cpp algo_test/Hstack_test.cpp algo_test/Vsplit_test.cpp diff --git a/tests/gpu/CMakeLists.txt b/tests/gpu/CMakeLists.txt index 116718f1..a76fdc6a 100644 --- a/tests/gpu/CMakeLists.txt +++ b/tests/gpu/CMakeLists.txt @@ -22,7 +22,6 @@ add_executable( linalg_test/Svd_test.cpp linalg_test/GeSvd_test.cpp linalg_test/linalg_test.cpp - linalg_test/Det_test.cpp algo_test/Hsplit_test.cpp algo_test/Hstack_test.cpp algo_test/Vsplit_test.cpp diff --git a/tests/gpu/linalg_test/Det_test.cpp b/tests/gpu/linalg_test/Det_test.cpp deleted file mode 100644 index d941a935..00000000 --- a/tests/gpu/linalg_test/Det_test.cpp +++ /dev/null @@ -1,161 +0,0 @@ -#include "cytnx.hpp" -#include -#include "../test_tools.h" - -using namespace cytnx; -using namespace testing; -using namespace TestTools; - -namespace DetTest { - - static cytnx_uint64 rand_seed1, rand_seed2; - - void ExcuteDetTest(const Tensor& T); - - void ErrorTestExcute(const Tensor& T); - - /*=====test info===== - describe:Test all possible data type and check the results. - input: - T:Tensor with shape {6, 6} or {2, 2}, {1, 1}, {3, 3}, {4, 4} and test for all - possilbe data type. - ====================*/ - TEST(Det, allDType) { - for (auto device : device_list) { - for (auto dtype1 : dtype_list) { - for (auto dtype2 : dtype_list) { - // The GPU version of Det is just too slow. - // So we lower the size of the tensor from 6,6 to 3,3. - Tensor T = Tensor({3, 3}, dtype1, device); - InitTensorUniform(T, rand_seed1 = 3); - ExcuteDetTest(T); - - T = Tensor({2, 2}, dtype1, device); - InitTensorUniform(T, rand_seed1 = 3); - ExcuteDetTest(T); - - T = Tensor({1, 1}, dtype1, device); - InitTensorUniform(T, rand_seed1 = 3); - ExcuteDetTest(T); - - T = Tensor({3, 3}, dtype1, device); - InitTensorUniform(T, rand_seed1 = 3); - ExcuteDetTest(T); - - T = Tensor({4, 4}, dtype1, device); - InitTensorUniform(T, rand_seed1 = 3); - ExcuteDetTest(T); - } - } - } - } - - /*=====test info===== - describe:Test the tensor only 1 have one element. Test for all possible data type. - input: - T:Tensor with shape {1} on cpu, testing for all possible data type. - ====================*/ - TEST(Det, one_elem_tens) { - for (auto dtype : dtype_list) { - Tensor T = Tensor({1, 1}, dtype); - InitTensorUniform(T, rand_seed1 = 3); - ExcuteDetTest(T); - } - } - - /*=====test info===== - describe:Test for not contiguous tensor. - input: - T:Double data type not contiguous tensor with shape {9,9} on cpu. - ====================*/ - TEST(Det, not_contiguous) { - Tensor T = Tensor({9, 9}, Type.Double); - InitTensorUniform(T, rand_seed1 = 1); - // permute then they will not contiguous - T.permute_({1, 0}); // shape:[9,9] - ExcuteDetTest(T); - } - - // error test - /*=====test info===== - describe:Test the input tensors are both void tensor. - input: - T:void tensor - ====================*/ - TEST(Det, err_void_tens) { - Tensor T = Tensor(); - ErrorTestExcute(T); - } - - /*=====test info===== - describe:Test contains shared axis of the tensors are not same. - input: - T:double type tensor with shape {2, 3} on cpu. - ====================*/ - TEST(Det, err_axis_dim_wrong) { - Tensor T = Tensor({2, 3}); - InitTensorUniform(T, rand_seed1 = 0); - ErrorTestExcute(T); - } - - Tensor ConstructExpectTens(const Tensor& T) { - Tensor dst_T = zeros(1, T.dtype(), T.device()); - int n = T.shape()[0]; - cytnx_error_msg( - T.shape().size() != 2, - "[ERROR] [Det_test] [ConstructExpectTens]The input tensor should be rank-2 tensor.%s", "\n"); - for (auto s : T.shape()) { - cytnx_error_msg( - s != n, - "[ERROR] [Det_test] [ConstructExpectTens]The input tensor should be square matrix.%s", - "\n"); - } - if (n == 1) { - dst_T.at({0}) = T.at({0, 0}); - return dst_T; - } else if (n == 2) { - dst_T.at({0}) = T.at({0, 0}) * T.at({1, 1}) - T.at({0, 1}) * T.at({1, 0}); - return dst_T; - } - for (cytnx_uint64 a = 0; a < n; a++) { - Tensor T2 = zeros({T.shape()[0] - 1, T.shape()[1] - 1}, T.dtype(), T.device()); - for (cytnx_uint64 i = 0; i < n - 1; i++) { - for (cytnx_uint64 j = 0; j < n - 1; j++) { - cytnx_uint64 ii = i + (i >= 0); - cytnx_uint64 jj = j + (j >= a); - T2.at({i, j}) = T.at({ii, jj}); - } - } - dst_T({0}) += (a % 2 == 0 ? 1 : -1) * T.at({0, a}) * ConstructExpectTens(T2); - } - return dst_T; - } - - void ExcuteDetTest(const Tensor& T) { - Tensor det_T = linalg::Det(T); - Tensor expect_T; - expect_T = - ConstructExpectTens(T.dtype() > 4 ? T.astype(Type.Double).to(-1) : T.to(-1)).to(T.device()); - // std::cout << det_T << expect_T << std::endl; - const double tolerance = - std::pow(10.0, std::max((int)std::abs(std::log10(det_T(0).item())) - 6, 0)); - // Because the determinant will be casted to at least double, so we just cast the result to - // ComplexDouble for comparison. - std::cout << "Type: " << Type.getname(T.dtype()) << std::endl; - EXPECT_TRUE(AreNearlyEqTensor(det_T.astype(Type.ComplexDouble), - expect_T.astype(Type.ComplexDouble), tolerance)); - } - - void ErrorTestExcute(const Tensor& T) { - try { - auto dirsum_T = linalg::Det(T); - std::cerr << "[Test Error] This test should throw error but not !" << std::endl; - FAIL(); - } catch (const std::exception& ex) { - auto err_msg = ex.what(); - std::cerr << err_msg << std::endl; - SUCCEED(); - } - } - -} // namespace DetTest diff --git a/tests/gpu/linalg_test/Directsum_test.cpp b/tests/gpu/linalg_test/Directsum_test.cpp index 947961da..a5f65e47 100644 --- a/tests/gpu/linalg_test/Directsum_test.cpp +++ b/tests/gpu/linalg_test/Directsum_test.cpp @@ -24,7 +24,7 @@ namespace DirectsumTest { axes:{1} ====================*/ TEST(Directsum, gpu_allDType) { - for (auto device : device_list) { + for (auto device : device_list) { // now only test for cpu device. for (auto dtype1 : dtype_list) { for (auto dtype2 : dtype_list) { Tensor T1 = Tensor({12, 5, 7}, dtype1, device); diff --git a/tests/gpu/test_tools.h b/tests/gpu/test_tools.h index d4871289..81edfa3c 100644 --- a/tests/gpu/test_tools.h +++ b/tests/gpu/test_tools.h @@ -35,8 +35,8 @@ namespace TestTools { Type.Int32, Type.Uint32, Type.Int16, Type.Uint16, Type.Bool}; static std::vector device_list = { - // Device.cpu, - Device.cuda, + Device.cpu, + // Device.cuda, //currently cuda version still not implement }; // Tensor tools diff --git a/tests/linalg_test/Det_test.cpp b/tests/linalg_test/Det_test.cpp deleted file mode 100644 index f40d168c..00000000 --- a/tests/linalg_test/Det_test.cpp +++ /dev/null @@ -1,158 +0,0 @@ -#include "cytnx.hpp" -#include -#include "../test_tools.h" - -using namespace cytnx; -using namespace testing; -using namespace TestTools; - -namespace DetTest { - - static cytnx_uint64 rand_seed1, rand_seed2; - - void ExcuteDetTest(const Tensor& T); - - void ErrorTestExcute(const Tensor& T); - - /*=====test info===== - describe:Test all possible data type and check the results. - input: - T:Tensor with shape {6, 6} or {2, 2}, {1, 1}, {3, 3}, {4, 4} and test for all - possilbe data type. - ====================*/ - TEST(Det, allDType) { - for (auto device : device_list) { // now only test for cpu device. - for (auto dtype1 : dtype_list) { - for (auto dtype2 : dtype_list) { - Tensor T = Tensor({6, 6}, dtype1, device); - InitTensorUniform(T, rand_seed1 = 3); - ExcuteDetTest(T); - - T = Tensor({2, 2}, dtype1, device); - InitTensorUniform(T, rand_seed1 = 3); - ExcuteDetTest(T); - - T = Tensor({1, 1}, dtype1, device); - InitTensorUniform(T, rand_seed1 = 3); - ExcuteDetTest(T); - - T = Tensor({3, 3}, dtype1, device); - InitTensorUniform(T, rand_seed1 = 3); - ExcuteDetTest(T); - - T = Tensor({4, 4}, dtype1, device); - InitTensorUniform(T, rand_seed1 = 3); - ExcuteDetTest(T); - } - } - } - } - - /*=====test info===== - describe:Test the tensor only 1 have one element. Test for all possible data type. - input: - T:Tensor with shape {1} on cpu, testing for all possible data type. - ====================*/ - TEST(Det, one_elem_tens) { - for (auto dtype : dtype_list) { - Tensor T = Tensor({1, 1}, dtype); - InitTensorUniform(T, rand_seed1 = 3); - ExcuteDetTest(T); - } - } - - /*=====test info===== - describe:Test for not contiguous tensor. - input: - T:Double data type not contiguous tensor with shape {9,9} on cpu. - ====================*/ - TEST(Det, not_contiguous) { - Tensor T = Tensor({9, 9}, Type.Double); - InitTensorUniform(T, rand_seed1 = 1); - // permute then they will not contiguous - T.permute_({1, 0}); // shape:[9,9] - ExcuteDetTest(T); - } - - // error test - /*=====test info===== - describe:Test the input tensors are both void tensor. - input: - T:void tensor - ====================*/ - TEST(Det, err_void_tens) { - Tensor T = Tensor(); - ErrorTestExcute(T); - } - - /*=====test info===== - describe:Test contains shared axis of the tensors are not same. - input: - T:double type tensor with shape {2, 3} on cpu. - ====================*/ - TEST(Det, err_axis_dim_wrong) { - Tensor T = Tensor({2, 3}); - InitTensorUniform(T, rand_seed1 = 0); - ErrorTestExcute(T); - } - - Tensor ConstructExpectTens(const Tensor& T) { - Tensor dst_T = zeros(1, T.dtype(), T.device()); - int n = T.shape()[0]; - cytnx_error_msg( - T.shape().size() != 2, - "[ERROR] [Det_test] [ConstructExpectTens]The input tensor should be rank-2 tensor.%s", "\n"); - for (auto s : T.shape()) { - cytnx_error_msg( - s != n, - "[ERROR] [Det_test] [ConstructExpectTens]The input tensor should be square matrix.%s", - "\n"); - } - if (n == 1) { - dst_T.at({0}) = T.at({0, 0}); - return dst_T; - } else if (n == 2) { - dst_T.at({0}) = T.at({0, 0}) * T.at({1, 1}) - T.at({0, 1}) * T.at({1, 0}); - return dst_T; - } - for (cytnx_uint64 a = 0; a < n; a++) { - Tensor T2 = zeros({T.shape()[0] - 1, T.shape()[1] - 1}, T.dtype(), T.device()); - for (cytnx_uint64 i = 0; i < n - 1; i++) { - for (cytnx_uint64 j = 0; j < n - 1; j++) { - cytnx_uint64 ii = i + (i >= 0); - cytnx_uint64 jj = j + (j >= a); - T2.at({i, j}) = T.at({ii, jj}); - } - } - dst_T({0}) += (a % 2 == 0 ? 1 : -1) * T.at({0, a}) * ConstructExpectTens(T2); - } - return dst_T; - } - - void ExcuteDetTest(const Tensor& T) { - Tensor det_T = linalg::Det(T); - Tensor expect_T; - expect_T = ConstructExpectTens(T.dtype() > 4 ? T.astype(Type.Double) : T); - // std::cout << det_T << expect_T << std::endl; - const double tolerance = - std::pow(10.0, std::max((int)std::abs(std::log10(det_T(0).item())) - 6, 0)); - // Because the determinant will be casted to at least double, so we just cast the result to - // ComplexDouble for comparison. - std::cout << "Type: " << Type.getname(T.dtype()) << std::endl; - EXPECT_TRUE(AreNearlyEqTensor(det_T.astype(Type.ComplexDouble), - expect_T.astype(Type.ComplexDouble), tolerance)); - } - - void ErrorTestExcute(const Tensor& T) { - try { - auto dirsum_T = linalg::Det(T); - std::cerr << "[Test Error] This test should throw error but not !" << std::endl; - FAIL(); - } catch (const std::exception& ex) { - auto err_msg = ex.what(); - std::cerr << err_msg << std::endl; - SUCCEED(); - } - } - -} // namespace DetTest