Skip to content

Commit

Permalink
Update documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
amontoison committed Mar 11, 2024
1 parent 0d13a5d commit 159dd11
Show file tree
Hide file tree
Showing 7 changed files with 135 additions and 80 deletions.
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ CUDSS_jll = "0.1.0"
julia = "1.6"
LinearAlgebra = "1.6"
SparseArrays = "1.6"
Random = "1.6"
Test = "1.6"

[extras]
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test"]
test = ["Random", "Test"]
46 changes: 42 additions & 4 deletions docs/src/generic.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,18 @@ x_gpu = F \ b_gpu

r_gpu = b_gpu - A_gpu * x_gpu
norm(r_gpu)

# In-place LU
d_gpu = rand(T, n) |> CuVector
B_gpu = A_gpu + Diagonal(d_gpu)
lu!(F, B_gpu)

c_cpu = rand(T, n)
c_gpu = CuVector(c_cpu)
ldiv!(x_gpu, F, c_gpu)

r_gpu = c_gpu - B_gpu * x_gpu
norm(r_gpu)
```

## Example 2: Sparse symmetric linear system with multiple right-hand sides
Expand All @@ -32,6 +44,7 @@ using LinearAlgebra
using SparseArrays

T = Float64
R = real(T)
n = 100
p = 5
A_cpu = sprand(T, n, n, 0.05) + I
Expand All @@ -43,14 +56,26 @@ B_gpu = CuMatrix(B_cpu)
X_gpu = similar(B_gpu)

F = ldlt(A_gpu, view='L')
ldiv!(X_gpu, F, B_gpu)
X_gpu = F \ B_gpu

R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu
norm(R_gpu)

# In-place LDLᵀ
d_gpu = rand(R, n) |> CuVector
B_gpu = A_gpu + Diagonal(d_gpu)
ldlt!(F, B_gpu)

C_cpu = rand(T, n, p)
C_gpu = CuMatrix(C_cpu)
ldiv!(X_gpu, F, C_gpu)

R_gpu = C_gpu - ( CuSparseMatrixCSR(A_cpu) + Diagonal(d_gpu) ) * X_gpu
norm(R_gpu)
```

!!! note
If we only store one triangle of `A_gpu`, we can also use the wrappers `Symmetric` and `Hermitian`. For real matrices, both wrappers are allowed but only `Hermitian` can be used for complex matrices.
If we only store one triangle of `A_gpu`, we can also use the wrappers `Symmetric` and `Hermitian` instead of using the keyword argument `view` in `ldlt`. For real matrices, both wrappers are allowed but only `Hermitian` can be used for complex matrices.

```julia
S_gpu = Symmetric(A_gpu, :L)
Expand All @@ -66,6 +91,7 @@ using LinearAlgebra
using SparseArrays

T = ComplexF64
R = real(T)
n = 100
p = 5
A_cpu = sprand(T, n, n, 0.01)
Expand All @@ -77,14 +103,26 @@ B_gpu = CuMatrix(B_cpu)
X_gpu = similar(B_gpu)

F = cholesky(A_gpu, view='U')
ldiv!(X_gpu, F, B_gpu)
X_gpu = F \ B_gpu

R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu
norm(R_gpu)

# In-place LLᴴ
d_gpu = rand(R, n) |> CuVector
B_gpu = A_gpu + Diagonal(d_gpu)
cholesky!(F, B_gpu)

C_cpu = rand(T, n, p)
C_gpu = CuMatrix(C_cpu)
ldiv!(X_gpu, F, C_gpu)

R_gpu = C_gpu - ( CuSparseMatrixCSR(A_cpu) + Diagonal(d_gpu) ) * X_gpu
norm(R_gpu)
```

!!! note
If we only store one triangle of `A_gpu`, we can also use the wrappers `Symmetric` and `Hermitian`. For real matrices, both wrappers are allowed but only `Hermitian` can be used for complex matrices.
If we only store one triangle of `A_gpu`, we can also use the wrappers `Symmetric` and `Hermitian` instead of using the keyword argument `view` in `cholesky`. For real matrices, both wrappers are allowed but only `Hermitian` can be used for complex matrices.

```julia
H_gpu = Hermitian(A_gpu, :U)
Expand Down
16 changes: 8 additions & 8 deletions src/generic.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
function LinearAlgebra.lu(A::CuSparseMatrixCSR{T}) where T <: BlasFloat
function LinearAlgebra.lu(A::CuSparseMatrixCSR{T,Cint}) where T <: BlasFloat
n = LinearAlgebra.checksquare(A)
solver = CudssSolver(A, "G", 'F')
x = CudssMatrix(T, n)
Expand All @@ -8,7 +8,7 @@ function LinearAlgebra.lu(A::CuSparseMatrixCSR{T}) where T <: BlasFloat
return solver
end

function LinearAlgebra.ldlt(A::CuSparseMatrixCSR{T}; view::Char='F') where T <: BlasFloat
function LinearAlgebra.ldlt(A::CuSparseMatrixCSR{T,Cint}; view::Char='F') where T <: BlasFloat
n = LinearAlgebra.checksquare(A)
structure = T <: Real ? "S" : "H"
solver = CudssSolver(A, structure, view)
Expand All @@ -20,10 +20,10 @@ function LinearAlgebra.ldlt(A::CuSparseMatrixCSR{T}; view::Char='F') where T <:
return solver
end

LinearAlgebra.ldlt(A::Symmetric{T,<:CuSparseMatrixCSR{T}}) where T <: BlasReal = LinearAlgebra.ldlt(A.data, view=A.uplo)
LinearAlgebra.ldlt(A::Hermitian{T,<:CuSparseMatrixCSR{T}}) where T <: BlasFloat = LinearAlgebra.ldlt(A.data, view=A.uplo)
LinearAlgebra.ldlt(A::Symmetric{T,<:CuSparseMatrixCSR{T,Cint}}) where T <: BlasReal = LinearAlgebra.ldlt(A.data, view=A.uplo)
LinearAlgebra.ldlt(A::Hermitian{T,<:CuSparseMatrixCSR{T,Cint}}) where T <: BlasFloat = LinearAlgebra.ldlt(A.data, view=A.uplo)

function LinearAlgebra.cholesky(A::CuSparseMatrixCSR{T}; view::Char='F') where T <: BlasFloat
function LinearAlgebra.cholesky(A::CuSparseMatrixCSR{T,Cint}; view::Char='F') where T <: BlasFloat
n = LinearAlgebra.checksquare(A)
structure = T <: Real ? "SPD" : "HPD"
solver = CudssSolver(A, structure, view)
Expand All @@ -34,12 +34,12 @@ function LinearAlgebra.cholesky(A::CuSparseMatrixCSR{T}; view::Char='F') where T
return solver
end

LinearAlgebra.cholesky(A::Symmetric{T,<:CuSparseMatrixCSR{T}}) where T <: BlasReal = LinearAlgebra.cholesky(A.data, view=A.uplo)
LinearAlgebra.cholesky(A::Hermitian{T,<:CuSparseMatrixCSR{T}}) where T <: BlasFloat = LinearAlgebra.cholesky(A.data, view=A.uplo)
LinearAlgebra.cholesky(A::Symmetric{T,<:CuSparseMatrixCSR{T,Cint}}) where T <: BlasReal = LinearAlgebra.cholesky(A.data, view=A.uplo)
LinearAlgebra.cholesky(A::Hermitian{T,<:CuSparseMatrixCSR{T,Cint}}) where T <: BlasFloat = LinearAlgebra.cholesky(A.data, view=A.uplo)

for fun in (:lu!, :ldlt!, :cholesky!)
@eval begin
function LinearAlgebra.$fun(solver::CudssSolver{T}, A::CuSparseMatrixCSR{T}) where T <: BlasFloat
function LinearAlgebra.$fun(solver::CudssSolver{T}, A::CuSparseMatrixCSR{T,Cint}) where T <: BlasFloat
n = LinearAlgebra.checksquare(A)
cudss_set(solver.matrix, A)
x = CudssMatrix(T, n)
Expand Down
4 changes: 2 additions & 2 deletions src/helpers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ export CudssMatrix, CudssData, CudssConfig
"""
matrix = CudssMatrix(v::CuVector{T})
matrix = CudssMatrix(A::CuMatrix{T})
matrix = CudssMatrix(A::CuSparseMatrixCSR{T}, struture::String, view::Char; index::Char='O')
matrix = CudssMatrix(A::CuSparseMatrixCSR{T,Cint}, struture::String, view::Char; index::Char='O')
The type `T` can be `Float32`, `Float64`, `ComplexF32` or `ComplexF64`.
Expand Down Expand Up @@ -75,7 +75,7 @@ mutable struct CudssMatrix{T}
obj
end

function CudssMatrix(A::CuSparseMatrixCSR{T}, structure::String, view::Char; index::Char='O') where T <: BlasFloat
function CudssMatrix(A::CuSparseMatrixCSR{T,Cint}, structure::String, view::Char; index::Char='O') where T <: BlasFloat
m,n = size(A)
matrix_ref = Ref{cudssMatrix_t}()
cudssMatrixCreateCsr(matrix_ref, m, n, nnz(A), A.rowPtr, CU_NULL,
Expand Down
8 changes: 4 additions & 4 deletions src/interfaces.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
export CudssSolver, cudss, cudss_set, cudss_get

"""
solver = CudssSolver(A::CuSparseMatrixCSR{T}, structure::String, view::Char; index::Char='O')
solver = CudssSolver(A::CuSparseMatrixCSR{T,Cint}, structure::String, view::Char; index::Char='O')
solver = CudssSolver(matrix::CudssMatrix{T}, config::CudssConfig, data::CudssData)
The type `T` can be `Float32`, `Float64`, `ComplexF32` or `ComplexF64`.
Expand Down Expand Up @@ -36,7 +36,7 @@ mutable struct CudssSolver{T}
return new{T}(matrix, config, data)
end

function CudssSolver(A::CuSparseMatrixCSR{T}, structure::String, view::Char; index::Char='O') where T <: BlasFloat
function CudssSolver(A::CuSparseMatrixCSR{T,Cint}, structure::String, view::Char; index::Char='O') where T <: BlasFloat
matrix = CudssMatrix(A, structure, view; index)
config = CudssConfig()
data = CudssData()
Expand All @@ -47,7 +47,7 @@ end
"""
cudss_set(matrix::CudssMatrix{T}, v::CuVector{T})
cudss_set(matrix::CudssMatrix{T}, A::CuMatrix{T})
cudss_set(matrix::CudssMatrix{T}, A::CuSparseMatrixCSR{T})
cudss_set(matrix::CudssMatrix{T}, A::CuSparseMatrixCSR{T,Cint})
cudss_set(data::CudssSolver, param::String, value)
cudss_set(config::CudssConfig, param::String, value)
cudss_set(data::CudssData, param::String, value)
Expand Down Expand Up @@ -80,7 +80,7 @@ function cudss_set(matrix::CudssMatrix{T}, A::CuMatrix{T}) where T <: BlasFloat
cudssMatrixSetValues(matrix, A)
end

function cudss_set(matrix::CudssMatrix{T}, A::CuSparseMatrixCSR{T}) where T <: BlasFloat
function cudss_set(matrix::CudssMatrix{T}, A::CuSparseMatrixCSR{T,Cint}) where T <: BlasFloat
cudssMatrixSetCsrPointers(matrix, A.rowPtr, CU_NULL, A.colVal, A.nzVal)
end

Expand Down
4 changes: 3 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using Test
using Test, Random
using CUDA, CUDA.CUSPARSE
using CUDSS
using SparseArrays
Expand All @@ -9,6 +9,8 @@ import CUDSS: CUDSS_ALG_DEFAULT, CUDSS_ALG_1, CUDSS_ALG_2, CUDSS_ALG_3

@info("CUDSS_INSTALLATION : $(CUDSS.CUDSS_INSTALLATION)")

Random.seed!(666) # Random tests are diabolical

include("test_cudss.jl")

@testset "CUDSS" begin
Expand Down
133 changes: 73 additions & 60 deletions test/test_cudss.jl
Original file line number Diff line number Diff line change
Expand Up @@ -113,79 +113,90 @@ function cudss_execution()
@testset "precision = $T" for T in (Float32, Float64, ComplexF32, ComplexF64)
R = real(T)
@testset "Unsymmetric -- Non-Hermitian" begin
A_cpu = sprand(T, n, n, 0.02) + I
x_cpu = zeros(T, n)
b_cpu = rand(T, n)
@testset "Pivoting = $pivot" for pivot in ('C', 'R', 'N')
A_cpu = sprand(T, n, n, 0.02) + I
x_cpu = zeros(T, n)
b_cpu = rand(T, n)

A_gpu = CuSparseMatrixCSR(A_cpu)
x_gpu = CuVector(x_cpu)
b_gpu = CuVector(b_cpu)
A_gpu = CuSparseMatrixCSR(A_cpu)
x_gpu = CuVector(x_cpu)
b_gpu = CuVector(b_cpu)

matrix = CudssMatrix(A_gpu, "G", 'F')
config = CudssConfig()
data = CudssData()
solver = CudssSolver(matrix, config, data)
matrix = CudssMatrix(A_gpu, "G", 'F')
config = CudssConfig()
data = CudssData()
solver = CudssSolver(matrix, config, data)
cudss_set(solver, "pivot_type", pivot)

cudss("analysis", solver, x_gpu, b_gpu)
cudss("factorization", solver, x_gpu, b_gpu)
cudss("solve", solver, x_gpu, b_gpu)
cudss("analysis", solver, x_gpu, b_gpu)
cudss("factorization", solver, x_gpu, b_gpu)
cudss("solve", solver, x_gpu, b_gpu)

r_gpu = b_gpu - A_gpu * x_gpu
@test norm(r_gpu) eps(R)
r_gpu = b_gpu - A_gpu * x_gpu
@test norm(r_gpu) eps(R)
end
end

@testset "view = $view" for view in ('L', 'U', 'F')
@testset "Symmetric -- Hermitian" begin
A_cpu = sprand(T, n, n, 0.01) + I
A_cpu = A_cpu + A_cpu'
X_cpu = zeros(T, n, p)
B_cpu = rand(T, n, p)
symmetric_hermitian_pivots = T <: Real ? ('C', 'R', 'N') : ('N',)
@testset "Symmetric -- Hermitian" begin
@testset "view = $view" for view in ('F',)
@testset "Pivoting = $pivot" for pivot in symmetric_hermitian_pivots
A_cpu = sprand(T, n, n, 0.01) + I
A_cpu = A_cpu + A_cpu'
X_cpu = zeros(T, n, p)
B_cpu = rand(T, n, p)

(view == 'L') && (A_gpu = CuSparseMatrixCSR(A_cpu |> tril))
(view == 'U') && (A_gpu = CuSparseMatrixCSR(A_cpu |> triu))
(view == 'F') && (A_gpu = CuSparseMatrixCSR(A_cpu))
X_gpu = CuMatrix(X_cpu)
B_gpu = CuMatrix(B_cpu)
(view == 'L') && (A_gpu = CuSparseMatrixCSR(A_cpu |> tril))
(view == 'U') && (A_gpu = CuSparseMatrixCSR(A_cpu |> triu))
(view == 'F') && (A_gpu = CuSparseMatrixCSR(A_cpu))
X_gpu = CuMatrix(X_cpu)
B_gpu = CuMatrix(B_cpu)

structure = T <: Real ? "S" : "H"
matrix = CudssMatrix(A_gpu, structure, view)
config = CudssConfig()
data = CudssData()
solver = CudssSolver(matrix, config, data)
(structure == "H") && cudss_set(solver, "pivot_type", 'N')
structure = T <: Real ? "S" : "H"
matrix = CudssMatrix(A_gpu, structure, view)
config = CudssConfig()
data = CudssData()
solver = CudssSolver(matrix, config, data)
cudss_set(solver, "pivot_type", pivot)

cudss("analysis", solver, X_gpu, B_gpu)
cudss("factorization", solver, X_gpu, B_gpu)
cudss("solve", solver, X_gpu, B_gpu)
cudss("analysis", solver, X_gpu, B_gpu)
cudss("factorization", solver, X_gpu, B_gpu)
cudss("solve", solver, X_gpu, B_gpu)

R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu
@test norm(R_gpu) eps(R)
R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu
@test norm(R_gpu) eps(R)
end
end
end

@testset "SPD -- HPD" begin
A_cpu = sprand(T, n, n, 0.01)
A_cpu = A_cpu * A_cpu' + I
X_cpu = zeros(T, n, p)
B_cpu = rand(T, n, p)
@testset "SPD -- HPD" begin
@testset "view = $view" for view in ('F',)
@testset "Pivoting = $pivot" for pivot in ('C', 'R', 'N')
A_cpu = sprand(T, n, n, 0.01)
A_cpu = A_cpu * A_cpu' + I
X_cpu = zeros(T, n, p)
B_cpu = rand(T, n, p)

(view == 'L') && (A_gpu = CuSparseMatrixCSR(A_cpu |> tril))
(view == 'U') && (A_gpu = CuSparseMatrixCSR(A_cpu |> triu))
(view == 'F') && (A_gpu = CuSparseMatrixCSR(A_cpu))
X_gpu = CuMatrix(X_cpu)
B_gpu = CuMatrix(B_cpu)
(view == 'L') && (A_gpu = CuSparseMatrixCSR(A_cpu |> tril))
(view == 'U') && (A_gpu = CuSparseMatrixCSR(A_cpu |> triu))
(view == 'F') && (A_gpu = CuSparseMatrixCSR(A_cpu))
X_gpu = CuMatrix(X_cpu)
B_gpu = CuMatrix(B_cpu)

structure = T <: Real ? "SPD" : "HPD"
matrix = CudssMatrix(A_gpu, structure, view)
config = CudssConfig()
data = CudssData()
solver = CudssSolver(matrix, config, data)
structure = T <: Real ? "SPD" : "HPD"
matrix = CudssMatrix(A_gpu, structure, view)
config = CudssConfig()
data = CudssData()
solver = CudssSolver(matrix, config, data)
cudss_set(solver, "pivot_type", pivot)

cudss("analysis", solver, X_gpu, B_gpu)
cudss("factorization", solver, X_gpu, B_gpu)
cudss("solve", solver, X_gpu, B_gpu)
cudss("analysis", solver, X_gpu, B_gpu)
cudss("factorization", solver, X_gpu, B_gpu)
cudss("solve", solver, X_gpu, B_gpu)

R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu
@test norm(R_gpu) eps(R)
R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu
@test norm(R_gpu) eps(R)
end
end
end
end
Expand Down Expand Up @@ -234,8 +245,8 @@ function cudss_generic()
end
end

@testset "view = $view" for view in ('F',)
@testset "Symmetric -- Hermitian" begin
@testset "Symmetric -- Hermitian" begin
@testset "view = $view" for view in ('F',)
A_cpu = sprand(T, n, n, 0.01) + I
A_cpu = A_cpu + A_cpu'
B_cpu = rand(T, n, p)
Expand Down Expand Up @@ -281,8 +292,10 @@ function cudss_generic()
@test norm(R_gpu2) eps(R)
end
end
end

@testset "SPD -- HPD" begin
@testset "SPD -- HPD" begin
@testset "view = $view" for view in ('F',)
A_cpu = sprand(T, n, n, 0.01)
A_cpu = A_cpu * A_cpu' + I
B_cpu = rand(T, n, p)
Expand Down

0 comments on commit 159dd11

Please sign in to comment.