diff --git a/Project.toml b/Project.toml index f536d45..4f3e5ec 100644 --- a/Project.toml +++ b/Project.toml @@ -17,10 +17,12 @@ CUDSS_jll = "0.1.0" julia = "1.6" LinearAlgebra = "1.6" SparseArrays = "1.6" +Random = "1.6" Test = "1.6" [extras] +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test"] +test = ["Random", "Test"] diff --git a/docs/src/generic.md b/docs/src/generic.md index b9690e6..7cabfc9 100644 --- a/docs/src/generic.md +++ b/docs/src/generic.md @@ -21,6 +21,18 @@ x_gpu = F \ b_gpu r_gpu = b_gpu - A_gpu * x_gpu norm(r_gpu) + +# In-place LU +d_gpu = rand(T, n) |> CuVector +B_gpu = A_gpu + Diagonal(d_gpu) +lu!(F, B_gpu) + +c_cpu = rand(T, n) +c_gpu = CuVector(c_cpu) +ldiv!(x_gpu, F, c_gpu) + +r_gpu = c_gpu - B_gpu * x_gpu +norm(r_gpu) ``` ## Example 2: Sparse symmetric linear system with multiple right-hand sides @@ -32,6 +44,7 @@ using LinearAlgebra using SparseArrays T = Float64 +R = real(T) n = 100 p = 5 A_cpu = sprand(T, n, n, 0.05) + I @@ -43,14 +56,26 @@ B_gpu = CuMatrix(B_cpu) X_gpu = similar(B_gpu) F = ldlt(A_gpu, view='L') -ldiv!(X_gpu, F, B_gpu) +X_gpu = F \ B_gpu R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu norm(R_gpu) + +# In-place LDLᵀ +d_gpu = rand(R, n) |> CuVector +B_gpu = A_gpu + Diagonal(d_gpu) +ldlt!(F, B_gpu) + +C_cpu = rand(T, n, p) +C_gpu = CuMatrix(C_cpu) +ldiv!(X_gpu, F, C_gpu) + +R_gpu = C_gpu - ( CuSparseMatrixCSR(A_cpu) + Diagonal(d_gpu) ) * X_gpu +norm(R_gpu) ``` !!! note - If we only store one triangle of `A_gpu`, we can also use the wrappers `Symmetric` and `Hermitian`. For real matrices, both wrappers are allowed but only `Hermitian` can be used for complex matrices. + If we only store one triangle of `A_gpu`, we can also use the wrappers `Symmetric` and `Hermitian` instead of using the keyword argument `view` in `ldlt`. For real matrices, both wrappers are allowed but only `Hermitian` can be used for complex matrices. ```julia S_gpu = Symmetric(A_gpu, :L) @@ -66,6 +91,7 @@ using LinearAlgebra using SparseArrays T = ComplexF64 +R = real(T) n = 100 p = 5 A_cpu = sprand(T, n, n, 0.01) @@ -77,14 +103,26 @@ B_gpu = CuMatrix(B_cpu) X_gpu = similar(B_gpu) F = cholesky(A_gpu, view='U') -ldiv!(X_gpu, F, B_gpu) +X_gpu = F \ B_gpu R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu norm(R_gpu) + +# In-place LLᴴ +d_gpu = rand(R, n) |> CuVector +B_gpu = A_gpu + Diagonal(d_gpu) +cholesky!(F, B_gpu) + +C_cpu = rand(T, n, p) +C_gpu = CuMatrix(C_cpu) +ldiv!(X_gpu, F, C_gpu) + +R_gpu = C_gpu - ( CuSparseMatrixCSR(A_cpu) + Diagonal(d_gpu) ) * X_gpu +norm(R_gpu) ``` !!! note - If we only store one triangle of `A_gpu`, we can also use the wrappers `Symmetric` and `Hermitian`. For real matrices, both wrappers are allowed but only `Hermitian` can be used for complex matrices. + If we only store one triangle of `A_gpu`, we can also use the wrappers `Symmetric` and `Hermitian` instead of using the keyword argument `view` in `cholesky`. For real matrices, both wrappers are allowed but only `Hermitian` can be used for complex matrices. ```julia H_gpu = Hermitian(A_gpu, :U) diff --git a/src/generic.jl b/src/generic.jl index 8b11455..0dc5386 100644 --- a/src/generic.jl +++ b/src/generic.jl @@ -1,4 +1,4 @@ -function LinearAlgebra.lu(A::CuSparseMatrixCSR{T}) where T <: BlasFloat +function LinearAlgebra.lu(A::CuSparseMatrixCSR{T,Cint}) where T <: BlasFloat n = LinearAlgebra.checksquare(A) solver = CudssSolver(A, "G", 'F') x = CudssMatrix(T, n) @@ -8,7 +8,7 @@ function LinearAlgebra.lu(A::CuSparseMatrixCSR{T}) where T <: BlasFloat return solver end -function LinearAlgebra.ldlt(A::CuSparseMatrixCSR{T}; view::Char='F') where T <: BlasFloat +function LinearAlgebra.ldlt(A::CuSparseMatrixCSR{T,Cint}; view::Char='F') where T <: BlasFloat n = LinearAlgebra.checksquare(A) structure = T <: Real ? "S" : "H" solver = CudssSolver(A, structure, view) @@ -20,10 +20,10 @@ function LinearAlgebra.ldlt(A::CuSparseMatrixCSR{T}; view::Char='F') where T <: return solver end -LinearAlgebra.ldlt(A::Symmetric{T,<:CuSparseMatrixCSR{T}}) where T <: BlasReal = LinearAlgebra.ldlt(A.data, view=A.uplo) -LinearAlgebra.ldlt(A::Hermitian{T,<:CuSparseMatrixCSR{T}}) where T <: BlasFloat = LinearAlgebra.ldlt(A.data, view=A.uplo) +LinearAlgebra.ldlt(A::Symmetric{T,<:CuSparseMatrixCSR{T,Cint}}) where T <: BlasReal = LinearAlgebra.ldlt(A.data, view=A.uplo) +LinearAlgebra.ldlt(A::Hermitian{T,<:CuSparseMatrixCSR{T,Cint}}) where T <: BlasFloat = LinearAlgebra.ldlt(A.data, view=A.uplo) -function LinearAlgebra.cholesky(A::CuSparseMatrixCSR{T}; view::Char='F') where T <: BlasFloat +function LinearAlgebra.cholesky(A::CuSparseMatrixCSR{T,Cint}; view::Char='F') where T <: BlasFloat n = LinearAlgebra.checksquare(A) structure = T <: Real ? "SPD" : "HPD" solver = CudssSolver(A, structure, view) @@ -34,12 +34,12 @@ function LinearAlgebra.cholesky(A::CuSparseMatrixCSR{T}; view::Char='F') where T return solver end -LinearAlgebra.cholesky(A::Symmetric{T,<:CuSparseMatrixCSR{T}}) where T <: BlasReal = LinearAlgebra.cholesky(A.data, view=A.uplo) -LinearAlgebra.cholesky(A::Hermitian{T,<:CuSparseMatrixCSR{T}}) where T <: BlasFloat = LinearAlgebra.cholesky(A.data, view=A.uplo) +LinearAlgebra.cholesky(A::Symmetric{T,<:CuSparseMatrixCSR{T,Cint}}) where T <: BlasReal = LinearAlgebra.cholesky(A.data, view=A.uplo) +LinearAlgebra.cholesky(A::Hermitian{T,<:CuSparseMatrixCSR{T,Cint}}) where T <: BlasFloat = LinearAlgebra.cholesky(A.data, view=A.uplo) for fun in (:lu!, :ldlt!, :cholesky!) @eval begin - function LinearAlgebra.$fun(solver::CudssSolver{T}, A::CuSparseMatrixCSR{T}) where T <: BlasFloat + function LinearAlgebra.$fun(solver::CudssSolver{T}, A::CuSparseMatrixCSR{T,Cint}) where T <: BlasFloat n = LinearAlgebra.checksquare(A) cudss_set(solver.matrix, A) x = CudssMatrix(T, n) diff --git a/src/helpers.jl b/src/helpers.jl index a44cd7d..142c49c 100644 --- a/src/helpers.jl +++ b/src/helpers.jl @@ -6,7 +6,7 @@ export CudssMatrix, CudssData, CudssConfig """ matrix = CudssMatrix(v::CuVector{T}) matrix = CudssMatrix(A::CuMatrix{T}) - matrix = CudssMatrix(A::CuSparseMatrixCSR{T}, struture::String, view::Char; index::Char='O') + matrix = CudssMatrix(A::CuSparseMatrixCSR{T,Cint}, struture::String, view::Char; index::Char='O') The type `T` can be `Float32`, `Float64`, `ComplexF32` or `ComplexF64`. @@ -75,7 +75,7 @@ mutable struct CudssMatrix{T} obj end - function CudssMatrix(A::CuSparseMatrixCSR{T}, structure::String, view::Char; index::Char='O') where T <: BlasFloat + function CudssMatrix(A::CuSparseMatrixCSR{T,Cint}, structure::String, view::Char; index::Char='O') where T <: BlasFloat m,n = size(A) matrix_ref = Ref{cudssMatrix_t}() cudssMatrixCreateCsr(matrix_ref, m, n, nnz(A), A.rowPtr, CU_NULL, diff --git a/src/interfaces.jl b/src/interfaces.jl index 159823d..2616e3c 100644 --- a/src/interfaces.jl +++ b/src/interfaces.jl @@ -1,7 +1,7 @@ export CudssSolver, cudss, cudss_set, cudss_get """ - solver = CudssSolver(A::CuSparseMatrixCSR{T}, structure::String, view::Char; index::Char='O') + solver = CudssSolver(A::CuSparseMatrixCSR{T,Cint}, structure::String, view::Char; index::Char='O') solver = CudssSolver(matrix::CudssMatrix{T}, config::CudssConfig, data::CudssData) The type `T` can be `Float32`, `Float64`, `ComplexF32` or `ComplexF64`. @@ -36,7 +36,7 @@ mutable struct CudssSolver{T} return new{T}(matrix, config, data) end - function CudssSolver(A::CuSparseMatrixCSR{T}, structure::String, view::Char; index::Char='O') where T <: BlasFloat + function CudssSolver(A::CuSparseMatrixCSR{T,Cint}, structure::String, view::Char; index::Char='O') where T <: BlasFloat matrix = CudssMatrix(A, structure, view; index) config = CudssConfig() data = CudssData() @@ -47,7 +47,7 @@ end """ cudss_set(matrix::CudssMatrix{T}, v::CuVector{T}) cudss_set(matrix::CudssMatrix{T}, A::CuMatrix{T}) - cudss_set(matrix::CudssMatrix{T}, A::CuSparseMatrixCSR{T}) + cudss_set(matrix::CudssMatrix{T}, A::CuSparseMatrixCSR{T,Cint}) cudss_set(data::CudssSolver, param::String, value) cudss_set(config::CudssConfig, param::String, value) cudss_set(data::CudssData, param::String, value) @@ -80,7 +80,7 @@ function cudss_set(matrix::CudssMatrix{T}, A::CuMatrix{T}) where T <: BlasFloat cudssMatrixSetValues(matrix, A) end -function cudss_set(matrix::CudssMatrix{T}, A::CuSparseMatrixCSR{T}) where T <: BlasFloat +function cudss_set(matrix::CudssMatrix{T}, A::CuSparseMatrixCSR{T,Cint}) where T <: BlasFloat cudssMatrixSetCsrPointers(matrix, A.rowPtr, CU_NULL, A.colVal, A.nzVal) end diff --git a/test/runtests.jl b/test/runtests.jl index ff9717f..2a3a653 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,4 @@ -using Test +using Test, Random using CUDA, CUDA.CUSPARSE using CUDSS using SparseArrays @@ -9,6 +9,8 @@ import CUDSS: CUDSS_ALG_DEFAULT, CUDSS_ALG_1, CUDSS_ALG_2, CUDSS_ALG_3 @info("CUDSS_INSTALLATION : $(CUDSS.CUDSS_INSTALLATION)") +Random.seed!(666) # Random tests are diabolical + include("test_cudss.jl") @testset "CUDSS" begin diff --git a/test/test_cudss.jl b/test/test_cudss.jl index 528767c..c9dd184 100644 --- a/test/test_cudss.jl +++ b/test/test_cudss.jl @@ -113,79 +113,90 @@ function cudss_execution() @testset "precision = $T" for T in (Float32, Float64, ComplexF32, ComplexF64) R = real(T) @testset "Unsymmetric -- Non-Hermitian" begin - A_cpu = sprand(T, n, n, 0.02) + I - x_cpu = zeros(T, n) - b_cpu = rand(T, n) + @testset "Pivoting = $pivot" for pivot in ('C', 'R', 'N') + A_cpu = sprand(T, n, n, 0.02) + I + x_cpu = zeros(T, n) + b_cpu = rand(T, n) - A_gpu = CuSparseMatrixCSR(A_cpu) - x_gpu = CuVector(x_cpu) - b_gpu = CuVector(b_cpu) + A_gpu = CuSparseMatrixCSR(A_cpu) + x_gpu = CuVector(x_cpu) + b_gpu = CuVector(b_cpu) - matrix = CudssMatrix(A_gpu, "G", 'F') - config = CudssConfig() - data = CudssData() - solver = CudssSolver(matrix, config, data) + matrix = CudssMatrix(A_gpu, "G", 'F') + config = CudssConfig() + data = CudssData() + solver = CudssSolver(matrix, config, data) + cudss_set(solver, "pivot_type", pivot) - cudss("analysis", solver, x_gpu, b_gpu) - cudss("factorization", solver, x_gpu, b_gpu) - cudss("solve", solver, x_gpu, b_gpu) + cudss("analysis", solver, x_gpu, b_gpu) + cudss("factorization", solver, x_gpu, b_gpu) + cudss("solve", solver, x_gpu, b_gpu) - r_gpu = b_gpu - A_gpu * x_gpu - @test norm(r_gpu) ≤ √eps(R) + r_gpu = b_gpu - A_gpu * x_gpu + @test norm(r_gpu) ≤ √eps(R) + end end - @testset "view = $view" for view in ('L', 'U', 'F') - @testset "Symmetric -- Hermitian" begin - A_cpu = sprand(T, n, n, 0.01) + I - A_cpu = A_cpu + A_cpu' - X_cpu = zeros(T, n, p) - B_cpu = rand(T, n, p) + symmetric_hermitian_pivots = T <: Real ? ('C', 'R', 'N') : ('N',) + @testset "Symmetric -- Hermitian" begin + @testset "view = $view" for view in ('F',) + @testset "Pivoting = $pivot" for pivot in symmetric_hermitian_pivots + A_cpu = sprand(T, n, n, 0.01) + I + A_cpu = A_cpu + A_cpu' + X_cpu = zeros(T, n, p) + B_cpu = rand(T, n, p) - (view == 'L') && (A_gpu = CuSparseMatrixCSR(A_cpu |> tril)) - (view == 'U') && (A_gpu = CuSparseMatrixCSR(A_cpu |> triu)) - (view == 'F') && (A_gpu = CuSparseMatrixCSR(A_cpu)) - X_gpu = CuMatrix(X_cpu) - B_gpu = CuMatrix(B_cpu) + (view == 'L') && (A_gpu = CuSparseMatrixCSR(A_cpu |> tril)) + (view == 'U') && (A_gpu = CuSparseMatrixCSR(A_cpu |> triu)) + (view == 'F') && (A_gpu = CuSparseMatrixCSR(A_cpu)) + X_gpu = CuMatrix(X_cpu) + B_gpu = CuMatrix(B_cpu) - structure = T <: Real ? "S" : "H" - matrix = CudssMatrix(A_gpu, structure, view) - config = CudssConfig() - data = CudssData() - solver = CudssSolver(matrix, config, data) - (structure == "H") && cudss_set(solver, "pivot_type", 'N') + structure = T <: Real ? "S" : "H" + matrix = CudssMatrix(A_gpu, structure, view) + config = CudssConfig() + data = CudssData() + solver = CudssSolver(matrix, config, data) + cudss_set(solver, "pivot_type", pivot) - cudss("analysis", solver, X_gpu, B_gpu) - cudss("factorization", solver, X_gpu, B_gpu) - cudss("solve", solver, X_gpu, B_gpu) + cudss("analysis", solver, X_gpu, B_gpu) + cudss("factorization", solver, X_gpu, B_gpu) + cudss("solve", solver, X_gpu, B_gpu) - R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu - @test norm(R_gpu) ≤ √eps(R) + R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu + @test norm(R_gpu) ≤ √eps(R) + end end + end - @testset "SPD -- HPD" begin - A_cpu = sprand(T, n, n, 0.01) - A_cpu = A_cpu * A_cpu' + I - X_cpu = zeros(T, n, p) - B_cpu = rand(T, n, p) + @testset "SPD -- HPD" begin + @testset "view = $view" for view in ('F',) + @testset "Pivoting = $pivot" for pivot in ('C', 'R', 'N') + A_cpu = sprand(T, n, n, 0.01) + A_cpu = A_cpu * A_cpu' + I + X_cpu = zeros(T, n, p) + B_cpu = rand(T, n, p) - (view == 'L') && (A_gpu = CuSparseMatrixCSR(A_cpu |> tril)) - (view == 'U') && (A_gpu = CuSparseMatrixCSR(A_cpu |> triu)) - (view == 'F') && (A_gpu = CuSparseMatrixCSR(A_cpu)) - X_gpu = CuMatrix(X_cpu) - B_gpu = CuMatrix(B_cpu) + (view == 'L') && (A_gpu = CuSparseMatrixCSR(A_cpu |> tril)) + (view == 'U') && (A_gpu = CuSparseMatrixCSR(A_cpu |> triu)) + (view == 'F') && (A_gpu = CuSparseMatrixCSR(A_cpu)) + X_gpu = CuMatrix(X_cpu) + B_gpu = CuMatrix(B_cpu) - structure = T <: Real ? "SPD" : "HPD" - matrix = CudssMatrix(A_gpu, structure, view) - config = CudssConfig() - data = CudssData() - solver = CudssSolver(matrix, config, data) + structure = T <: Real ? "SPD" : "HPD" + matrix = CudssMatrix(A_gpu, structure, view) + config = CudssConfig() + data = CudssData() + solver = CudssSolver(matrix, config, data) + cudss_set(solver, "pivot_type", pivot) - cudss("analysis", solver, X_gpu, B_gpu) - cudss("factorization", solver, X_gpu, B_gpu) - cudss("solve", solver, X_gpu, B_gpu) + cudss("analysis", solver, X_gpu, B_gpu) + cudss("factorization", solver, X_gpu, B_gpu) + cudss("solve", solver, X_gpu, B_gpu) - R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu - @test norm(R_gpu) ≤ √eps(R) + R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu + @test norm(R_gpu) ≤ √eps(R) + end end end end @@ -234,8 +245,8 @@ function cudss_generic() end end - @testset "view = $view" for view in ('F',) - @testset "Symmetric -- Hermitian" begin + @testset "Symmetric -- Hermitian" begin + @testset "view = $view" for view in ('F',) A_cpu = sprand(T, n, n, 0.01) + I A_cpu = A_cpu + A_cpu' B_cpu = rand(T, n, p) @@ -281,8 +292,10 @@ function cudss_generic() @test norm(R_gpu2) ≤ √eps(R) end end + end - @testset "SPD -- HPD" begin + @testset "SPD -- HPD" begin + @testset "view = $view" for view in ('F',) A_cpu = sprand(T, n, n, 0.01) A_cpu = A_cpu * A_cpu' + I B_cpu = rand(T, n, p)