diff --git a/src/CUDSS.jl b/src/CUDSS.jl index 121ba16..682c0d9 100644 --- a/src/CUDSS.jl +++ b/src/CUDSS.jl @@ -6,7 +6,7 @@ using LinearAlgebra using SparseArrays import CUDA: @checked, libraryPropertyType, cudaDataType, initialize_context, retry_reclaim, CUstream -import LinearAlgebra: lu, lu!, ldlt, ldlt!, cholesky, cholesky!, ldiv!, BlasFloat +import LinearAlgebra: lu, lu!, ldlt, ldlt!, cholesky, cholesky!, ldiv!, BlasFloat, BlasReal include("libcudss.jl") include("error.jl") diff --git a/src/generic.jl b/src/generic.jl index 39c4706..cf4e28b 100644 --- a/src/generic.jl +++ b/src/generic.jl @@ -8,10 +8,10 @@ function LinearAlgebra.lu(A::CuSparseMatrixCSR{T}) where T <: BlasFloat return solver end -function LinearAlgebra.ldlt(A::CuSparseMatrixCSR{T}) where T <: BlasFloat +function LinearAlgebra.ldlt(A::CuSparseMatrixCSR{T}; view::Char='F') where T <: BlasFloat n = LinearAlgebra.checksquare(A) structure = T <: Real ? "S" : "H" - solver = CudssSolver(A, structure, 'F') + solver = CudssSolver(A, structure, view) (T <: Complex) && cudss_set(solver, "pivot_type", 'N') x = CudssMatrix(T, n) b = CudssMatrix(T, n) @@ -20,10 +20,13 @@ function LinearAlgebra.ldlt(A::CuSparseMatrixCSR{T}) where T <: BlasFloat return solver end -function LinearAlgebra.cholesky(A::CuSparseMatrixCSR{T}) where T <: BlasFloat +LinearAlgebra.ldlt(A::Symmetric{T,<:CuSparseMatrixCSR{T}) where T <: BlasReal = LinearAlgebra.ldlt(A.data, view=A.uplo) +LinearAlgebra.ldlt(A::Hermitian{T,<:CuSparseMatrixCSR{T}) where T <: BlasFloat = LinearAlgebra.ldlt(A.data, view=A.uplo) + +function LinearAlgebra.cholesky(A::CuSparseMatrixCSR{T}; view::Char='F') where T <: BlasFloat n = LinearAlgebra.checksquare(A) structure = T <: Real ? "SPD" : "HPD" - solver = CudssSolver(A, structure, 'F') + solver = CudssSolver(A, structure, view) x = CudssMatrix(T, n) b = CudssMatrix(T, n) cudss("analysis", solver, x, b) @@ -31,6 +34,9 @@ function LinearAlgebra.cholesky(A::CuSparseMatrixCSR{T}) where T <: BlasFloat return solver end +LinearAlgebra.cholesky(A::Symmetric{T,<:CuSparseMatrixCSR{T}) where T <: BlasReal = LinearAlgebra.cholesky(A.data, view=A.uplo) +LinearAlgebra.cholesky(A::Hermitian{T,<:CuSparseMatrixCSR{T}) where T <: BlasFloat = LinearAlgebra.cholesky(A.data, view=A.uplo) + for fun in (:lu!, :ldlt!, :cholesky!) @eval begin function LinearAlgebra.$fun(solver::CudssSolver{T}, A::CuSparseMatrixCSR{T}) where T <: BlasFloat diff --git a/test/test_cudss.jl b/test/test_cudss.jl index 1836961..6227127 100644 --- a/test/test_cudss.jl +++ b/test/test_cudss.jl @@ -218,27 +218,32 @@ function cudss_generic() @test norm(r_gpu2) ≤ √eps(R) end - @testset "view = $view" for view in ('F',) + @testset "view = $view" for view in ('L', 'U', 'F') @testset "Symmetric -- Hermitian" begin A_cpu = sprand(T, n, n, 0.01) + I A_cpu = A_cpu + A_cpu' X_cpu = zeros(T, n, p) B_cpu = rand(T, n, p) - A_gpu = CuSparseMatrixCSR(A_cpu) + (view == 'L') && (A_gpu = CuSparseMatrixCSR(A_cpu |> tril)) + (view == 'U') && (A_gpu = CuSparseMatrixCSR(A_cpu |> triu)) + (view == 'F') && (A_gpu = CuSparseMatrixCSR(A_cpu)) X_gpu = CuMatrix(X_cpu) B_gpu = CuMatrix(B_cpu) - solver = ldlt(A_gpu) + solver = ldlt(A_gpu; view) ldiv!(X_gpu, solver, B_gpu) R_gpu = B_gpu - A_gpu * X_gpu @test norm(R_gpu) ≤ √eps(R) - A_gpu2 = rand(R) * A_gpu + c = rand(R) + A_cpu2 = c * A_cpu + A_gpu2 = c * A_gpu + ldlt!(solver, A_gpu2) X_gpu .= B_gpu ldiv!(solver, X_gpu) - R_gpu2 = B_gpu - A_gpu2 * X_gpu + R_gpu2 = B_gpu - CuSparseMatrixCSR(A_cpu2) * X_gpu @test norm(R_gpu2) ≤ √eps(R) end @@ -248,20 +253,25 @@ function cudss_generic() X_cpu = zeros(T, n, p) B_cpu = rand(T, n, p) - A_gpu = CuSparseMatrixCSR(A_cpu) + (view == 'L') && (A_gpu = CuSparseMatrixCSR(A_cpu |> tril)) + (view == 'U') && (A_gpu = CuSparseMatrixCSR(A_cpu |> triu)) + (view == 'F') && (A_gpu = CuSparseMatrixCSR(A_cpu)) X_gpu = CuMatrix(X_cpu) B_gpu = CuMatrix(B_cpu) - solver = cholesky(A_gpu) + solver = cholesky(A_gpu; view) ldiv!(X_gpu, solver, B_gpu) R_gpu = B_gpu - A_gpu * X_gpu @test norm(R_gpu) ≤ √eps(R) - A_gpu2 = rand(R) * A_gpu + c = rand(R) + A_cpu2 = c * A_cpu + A_gpu2 = c * A_gpu + cholesky!(solver, A_gpu2) X_gpu .= B_gpu ldiv!(solver, X_gpu) - R_gpu2 = B_gpu - A_gpu2 * X_gpu + R_gpu2 = B_gpu - CuSparseMatrixCSR(A_cpu2) * X_gpu @test norm(R_gpu2) ≤ √eps(R) end end