Skip to content

Commit

Permalink
Add tests with the hybrid mode
Browse files Browse the repository at this point in the history
  • Loading branch information
amontoison committed Jul 8, 2024
1 parent 6147c63 commit 0bed8c6
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 1 deletion.
2 changes: 1 addition & 1 deletion docs/src/options.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ cudss_set(solver, "hybrid_mode", 1)
cudss("analysis", solver, x_gpu, b_gpu)

# Minimal amount of device memory required in the hybrid memory mode.
nbytes = cudss_get(solver, "hybrid_device_memory_min")
nbytes_gpu = cudss_get(solver, "hybrid_device_memory_min")

# Device memory limit for the hybrid memory mode.
# Only use it if you don't want to rely on the internal default heuristic.
Expand Down
4 changes: 4 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,8 @@ include("test_cudss.jl")
@testset "Small matrices" begin
small_matrices()
end

@testset "Hybrid mode" begin
hybrid_mode()
end
end
104 changes: 104 additions & 0 deletions test/test_cudss.jl
Original file line number Diff line number Diff line change
Expand Up @@ -707,3 +707,107 @@ function small_matrices()
end
end
end

function hybrid_mode()
function hybrid_lu(T, A_cpu, x_cpu, b_cpu)
A_gpu = CuSparseMatrixCSR(A_cpu)
x_gpu = CuVector(x_cpu)
b_gpu = CuVector(b_cpu)

solver = CudssSolver(A_gpu, "G", 'F')
cudss_set(solver, "hybrid_mode", 1)

cudss("analysis", solver, x_gpu, b_gpu)
nbytes_gpu = cudss_get(solver, "hybrid_device_memory_min")
cudss_set(solver, "hybrid_device_memory_limit", nbytes_gpu)

cudss("factorization", solver, x_gpu, b_gpu)
cudss("solve", solver, x_gpu, b_gpu)

r_gpu = b_gpu - A_gpu * x_gpu
return norm(r_gpu)
end

function hybrid_ldlt(T, A_cpu, x_cpu, b_cpu, uplo)
if uplo == 'L'
A_gpu = CuSparseMatrixCSR(A_cpu |> tril)
elseif uplo == 'U'
A_gpu = CuSparseMatrixCSR(A_cpu |> triu)
else
A_gpu = CuSparseMatrixCSR(A_cpu)
end
x_gpu = CuVector(x_cpu)
b_gpu = CuVector(b_cpu)

structure = T <: Real ? "S" : "H"
solver = CudssSolver(A_gpu, structure, uplo)
cudss_set(solver, "hybrid_mode", 1)

cudss("analysis", solver, x_gpu, b_gpu)
nbytes_gpu = cudss_get(solver, "hybrid_device_memory_min")
cudss_set(solver, "hybrid_device_memory_limit", nbytes_gpu)

cudss("factorization", solver, x_gpu, b_gpu)
cudss("solve", solver, x_gpu, b_gpu)

r_gpu = b_gpu - CuSparseMatrixCSR(A_cpu) * x_gpu
return norm(r_gpu)
end

function hybrid_llt(T, A_cpu, x_cpu, b_cpu, uplo)
if uplo == 'L'
A_gpu = CuSparseMatrixCSR(A_cpu |> tril)
elseif uplo == 'U'
A_gpu = CuSparseMatrixCSR(A_cpu |> triu)
else
A_gpu = CuSparseMatrixCSR(A_cpu)
end
x_gpu = CuVector(x_cpu)
b_gpu = CuVector(b_cpu)

structure = T <: Real ? "SPD" : "HPD"
solver = CudssSolver(A_gpu, structure, uplo)
cudss_set(solver, "hybrid_mode", 1)

cudss("analysis", solver, x_gpu, b_gpu)
nbytes_gpu = cudss_get(solver, "hybrid_device_memory_min")
cudss_set(solver, "hybrid_device_memory_limit", nbytes_gpu)

cudss("factorization", solver, x_gpu, b_gpu)
cudss("solve", solver, x_gpu, b_gpu)

r_gpu = b_gpu - CuSparseMatrixCSR(A_cpu) * x_gpu
return norm(r_gpu)
end

@testset "precision = $T" for T in (Float32, Float64, ComplexF32, ComplexF64)
R = real(T)
@testset "LU" begin
A_cpu = sprand(T, n, n, 0.05) + I
x_cpu = zeros(T, n)
b_cpu = rand(T, n)
res = hybrid_lu(T, A_cpu, x_cpu, b_cpu)
@test res eps(R)
end
@testset "LDLᵀ / LDLᴴ" begin
A_cpu = sprand(T, n, n, 0.05) + I
A_cpu = A_cpu + A_cpu'
x_cpu = zeros(T, n)
b_cpu = rand(T, n)
@testset "uplo = $uplo" for uplo in ('L', 'U', 'F')
res = hybrid_ldlt(T, A_cpu, x_cpu, b_cpu, uplo)
@test res eps(R)
end
end
@testset "LLᵀ / LLᴴ" begin
A_cpu = sprand(T, n, n, 0.01)
A_cpu = A_cpu * A_cpu' + I
x_cpu = zeros(T, n)
b_cpu = rand(T, n)
@testset "uplo = $uplo" for uplo in ('L', 'U', 'F')
res = hybrid_llt(T, A_cpu, x_cpu, b_cpu, uplo)
@test res eps(R)
end
end
end
end

0 comments on commit 0bed8c6

Please sign in to comment.