diff --git a/README.md b/README.md index 4a825e1..9f0baa3 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,20 @@ cudss("solve", solver, x_gpu, b_gpu) r_gpu = b_gpu - A_gpu * x_gpu norm(r_gpu) + +# In-place LU +d_gpu = rand(T, n) |> CuVector +A_gpu = A_gpu + Diagonal(d_gpu) +cudss_set(solver, A_gpu) + +c_cpu = rand(T, n) +c_gpu = CuVector(c_cpu) + +cudss("factorization", solver, x_gpu, c_gpu) +cudss("solve", solver, x_gpu, c_gpu) + +r_gpu = c_gpu - A_gpu * x_gpu +norm(r_gpu) ``` ### Example 2: Sparse symmetric linear system with multiple right-hand sides @@ -64,6 +78,7 @@ using CUDSS using SparseArrays, LinearAlgebra T = Float64 +R = real(T) n = 100 p = 5 A_cpu = sprand(T, n, n, 0.05) + I @@ -84,6 +99,20 @@ cudss("solve", solver, X_gpu, B_gpu) R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu norm(R_gpu) + +# In-place LDLᵀ +d_gpu = rand(R, n) |> CuVector +A_gpu = A_gpu + Diagonal(d_gpu) +cudss_set(solver, A_gpu) + +C_cpu = rand(T, n, p) +C_gpu = CuMatrix(C_cpu) + +cudss("factorization", solver, X_gpu, C_gpu) +cudss("solve", solver, X_gpu, C_gpu) + +R_gpu = C_gpu - ( CuSparseMatrixCSR(A_cpu) + Diagonal(d_gpu) ) * X_gpu +norm(R_gpu) ``` ### Example 3: Sparse hermitian positive definite linear system with multiple right-hand sides @@ -94,6 +123,7 @@ using CUDSS using SparseArrays, LinearAlgebra T = ComplexF64 +R = real(T) n = 100 p = 5 A_cpu = sprand(T, n, n, 0.01) @@ -114,4 +144,18 @@ cudss("solve", solver, X_gpu, B_gpu) R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu norm(R_gpu) + +# In-place LLᴴ +d_gpu = rand(R, n) |> CuVector +A_gpu = A_gpu + Diagonal(d_gpu) +cudss_set(solver, A_gpu) + +C_cpu = rand(T, n, p) +C_gpu = CuMatrix(C_cpu) + +cudss("factorization", solver, X_gpu, C_gpu) +cudss("solve", solver, X_gpu, C_gpu) + +R_gpu = C_gpu - ( CuSparseMatrixCSR(A_cpu) + Diagonal(d_gpu) ) * X_gpu +norm(R_gpu) ``` diff --git a/src/interfaces.jl b/src/interfaces.jl index 681ce93..36b4e6a 100644 --- a/src/interfaces.jl +++ b/src/interfaces.jl @@ -48,6 +48,7 @@ end cudss_set(matrix::CudssMatrix{T}, v::CuVector{T}) cudss_set(matrix::CudssMatrix{T}, A::CuMatrix{T}) cudss_set(matrix::CudssMatrix{T}, A::CuSparseMatrixCSR{T,Cint}) + cudss_set(solver::CudssSolver{T}, A::CuSparseMatrixCSR{T,Cint}) cudss_set(solver::CudssSolver, parameter::String, value) cudss_set(config::CudssConfig, parameter::String, value) cudss_set(data::CudssData, parameter::String, value) @@ -84,6 +85,10 @@ function cudss_set(matrix::CudssMatrix{T}, A::CuSparseMatrixCSR{T,Cint}) where T cudssMatrixSetCsrPointers(matrix, A.rowPtr, CU_NULL, A.colVal, A.nzVal) end +function cudss_set(solver::CudssSolver{T}, A::CuSparseMatrixCSR{T,Cint}) where T <: BlasFloat + cudss_set(solver.matrix, A) +end + function cudss_set(solver::CudssSolver, parameter::String, value) if parameter ∈ CUDSS_CONFIG_PARAMETERS cudss_set(solver.config, parameter, value) diff --git a/test/test_cudss.jl b/test/test_cudss.jl index dfbf6c2..d1a0fd4 100644 --- a/test/test_cudss.jl +++ b/test/test_cudss.jl @@ -140,6 +140,20 @@ function cudss_execution() r_gpu = b_gpu - A_gpu * x_gpu @test norm(r_gpu) ≤ √eps(R) + + # In-place LU + d_gpu = rand(T, n) |> CuVector + A_gpu = A_gpu + Diagonal(d_gpu) + cudss_set(solver, A_gpu) + + c_cpu = rand(T, n) + c_gpu = CuVector(c_cpu) + + cudss("factorization", solver, x_gpu, c_gpu) + cudss("solve", solver, x_gpu, c_gpu) + + r_gpu = c_gpu - A_gpu * x_gpu + @test norm(r_gpu) ≤ √eps(R) end end @@ -171,6 +185,20 @@ function cudss_execution() R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu @test norm(R_gpu) ≤ √eps(R) + + # In-place LDLᵀ / LDLᴴ + d_gpu = rand(R, n) |> CuVector + A_gpu = A_gpu + Diagonal(d_gpu) + cudss_set(solver, A_gpu) + + C_cpu = rand(T, n, p) + C_gpu = CuMatrix(C_cpu) + + cudss("factorization", solver, X_gpu, C_gpu) + cudss("solve", solver, X_gpu, C_gpu) + + R_gpu = C_gpu - ( CuSparseMatrixCSR(A_cpu) + Diagonal(d_gpu) ) * X_gpu + @test norm(R_gpu) ≤ √eps(R) end end end @@ -202,6 +230,20 @@ function cudss_execution() R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu @test norm(R_gpu) ≤ √eps(R) + + # In-place LLᵀ / LLᴴ + d_gpu = rand(R, n) |> CuVector + A_gpu = A_gpu + Diagonal(d_gpu) + cudss_set(solver, A_gpu) + + C_cpu = rand(T, n, p) + C_gpu = CuMatrix(C_cpu) + + cudss("factorization", solver, X_gpu, C_gpu) + cudss("solve", solver, X_gpu, C_gpu) + + R_gpu = C_gpu - ( CuSparseMatrixCSR(A_cpu) + Diagonal(d_gpu) ) * X_gpu + @test norm(R_gpu) ≤ √eps(R) end end end