From 0ffde203fae5d5d57fa08b105707dd9c3d499771 Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Sun, 24 Mar 2024 22:07:41 -0400 Subject: [PATCH 1/3] Update libcudss.jl --- src/libcudss.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libcudss.jl b/src/libcudss.jl index 639b53d..194c992 100644 --- a/src/libcudss.jl +++ b/src/libcudss.jl @@ -127,14 +127,14 @@ end @checked function cudssDataSet(handle, data, param, value, sizeInBytes) initialize_context() @ccall libcudss.cudssDataSet(handle::cudssHandle_t, data::cudssData_t, - param::cudssDataParam_t, value::Ptr{Cvoid}, + param::cudssDataParam_t, value::PtrOrCuPtr{Cvoid}, sizeInBytes::Cint)::cudssStatus_t end @checked function cudssDataGet(handle, data, param, value, sizeInBytes, sizeWritten) initialize_context() @ccall libcudss.cudssDataGet(handle::cudssHandle_t, data::cudssData_t, - param::cudssDataParam_t, value::Ptr{Cvoid}, + param::cudssDataParam_t, value::PtrOrCuPtr{Cvoid}, sizeInBytes::Cint, sizeWritten::Ptr{Cint})::cudssStatus_t end From 9ec5569a35c7f2868934233b6319ea304654155a Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Sun, 24 Mar 2024 23:53:42 -0400 Subject: [PATCH 2/3] Test cudss_set with the data parameter user_perm --- src/interfaces.jl | 7 ++- test/runtests.jl | 4 ++ test/test_cudss.jl | 103 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 4 deletions(-) diff --git a/src/interfaces.jl b/src/interfaces.jl index be3b863..0fb6925 100644 --- a/src/interfaces.jl +++ b/src/interfaces.jl @@ -97,10 +97,9 @@ end function cudss_set(data::CudssData, param::String, value) (param ∈ CUDSS_DATA_PARAMETERS) || throw(ArgumentError("Unknown data parameter $param.")) (param == "user_perm") || throw(ArgumentError("Only the data parameter \"user_perm\" can be set.")) - type = CUDSS_TYPES[param] - val = Ref{type}(value) - nbytes = sizeof(val) - cudssDataSet(handle(), data, param, val, nbytes) + (value isa Vector{Cint} || value isa CuVector{Cint}) || throw(ArgumentError("The permutation is neither a Vector{Cint} nor a CuVector{Cint}.")) + nbytes = sizeof(value) + cudssDataSet(handle(), data, param, value, nbytes) end function cudss_set(config::CudssConfig, param::String, value) diff --git a/test/runtests.jl b/test/runtests.jl index 2a3a653..72d5b89 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -34,4 +34,8 @@ include("test_cudss.jl") @testset "Generic API" begin cudss_generic() end + + @testset "User permutation" begin + user_permutation() + end end diff --git a/test/test_cudss.jl b/test/test_cudss.jl index 50ea65c..df37a24 100644 --- a/test/test_cudss.jl +++ b/test/test_cudss.jl @@ -344,3 +344,106 @@ function cudss_generic() end end end + +function user_permutation() + function permutation_lu(A_cpu, x_cpu, b_cpu, permutation) + A_gpu = CuSparseMatrixCSR(A_cpu) + x_gpu = CuVector(x_cpu) + b_gpu = CuVector(b_cpu) + + solver = CudssSolver(A_gpu, "G", 'F') + + cudss_set(solver, "user_perm", permutation) + + cudss("analysis", solver, x_gpu, b_gpu) + cudss("factorization", solver, x_gpu, b_gpu) + cudss("solve", solver, x_gpu, b_gpu) + + nz = cudss_get(solver, "lu_nnz") + return nz + end + + function permutation_ldlt(A_cpu, x_cpu, b_cpu, permutation) + A_gpu = CuSparseMatrixCSR(A_cpu |> tril) + x_gpu = CuVector(x_cpu) + b_gpu = CuVector(b_cpu) + + structure = T <: Real ? "S" : "H" + solver = CudssSolver(A_gpu, structure, 'L') + cudss_set(solver, "user_perm", permutation) + + cudss("analysis", solver, x_gpu, b_gpu) + cudss("factorization", solver, x_gpu, b_gpu) + cudss("solve", solver, x_gpu, b_gpu) + + nz = cudss_get(solver, "lu_nnz") + return nz + end + + function permutation_llt(A_cpu, x_cpu, b_cpu, permutation) + A_gpu = CuSparseMatrixCSR(A_cpu |> triu) + x_gpu = CuVector(x_cpu) + b_gpu = CuVector(b_cpu) + + structure = T <: Real ? "SPD" : "HPD" + solver = CudssSolver(A_gpu, structure, 'U') + cudss_set(solver, "user_perm", permutation) + + cudss("analysis", solver, x_gpu, b_gpu) + cudss("factorization", solver, x_gpu, b_gpu) + cudss("solve", solver, x_gpu, b_gpu) + + nz = cudss_get(solver, "lu_nnz") + return nz + end + + n = 1000 + perm1_cpu = Vector{Cint}(undef, n) + perm2_cpu = Vector{Cint}(undef, n) + for i = 1:n + perm1_cpu[i] = i + perm2_cpu[i] = n-i+1 + end + perm1_gpu = CuVector{Cint}(perm1_cpu) + perm2_gpu = CuVector{Cint}(perm2_cpu) + @testset "precision = $T" for T in (Float32, Float64, ComplexF32, ComplexF64) + @testset "LU" begin + A_cpu = sprand(T, n, n, 0.05) + I + x_cpu = zeros(T, n) + b_cpu = rand(T, n) + nz1_cpu = permutation_lu(A_cpu, x_cpu, b_cpu, perm1_cpu) + nz2_cpu = permutation_lu(A_cpu, x_cpu, b_cpu, perm2_cpu) + nz1_gpu = permutation_lu(A_cpu, x_cpu, b_cpu, perm1_gpu) + nz2_gpu = permutation_lu(A_cpu, x_cpu, b_cpu, perm2_gpu) + @test nz1_cpu == nz1_gpu + @test nz2_cpu == nz2_gpu + @test nz1_cpu != nz2_cpu + end + @testset "LDLᵀ / LDLᴴ" begin + A_cpu = sprand(T, n, n, 0.05) + I + A_cpu = A_cpu + A_cpu' + x_cpu = zeros(T, n) + b_cpu = rand(T, n) + nz1_cpu = permutation_ldlt(A_cpu, x_cpu, b_cpu, perm1_cpu) + nz2_cpu = permutation_ldlt(A_cpu, x_cpu, b_cpu, perm2_cpu) + nz1_gpu = permutation_ldlt(A_cpu, x_cpu, b_cpu, perm1_gpu) + nz2_gpu = permutation_ldlt(A_cpu, x_cpu, b_cpu, perm2_gpu) + @test nz1_cpu == nz1_gpu + @test nz2_cpu == nz2_gpu + @test nz1_cpu != nz2_cpu + end + @testset "LLᵀ / LLᴴ" begin + A_cpu = sprand(T, n, n, 0.01) + A_cpu = A_cpu * A_cpu' + I + x_cpu = zeros(T, n) + b_cpu = rand(T, n) + nz1_cpu = permutation_llt(A_cpu, x_cpu, b_cpu, perm1_cpu) + nz2_cpu = permutation_llt(A_cpu, x_cpu, b_cpu, perm2_cpu) + nz1_gpu = permutation_llt(A_cpu, x_cpu, b_cpu, perm1_gpu) + nz2_gpu = permutation_llt(A_cpu, x_cpu, b_cpu, perm2_gpu) + @test nz1_cpu == nz1_gpu + @test nz2_cpu == nz2_gpu + @test nz1_cpu != nz2_cpu + end + end +end From e3bd4dee72aef70662a298664b5bc229a76d60bd Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Mon, 25 Mar 2024 00:04:17 -0400 Subject: [PATCH 3/3] Update the test user_permutation --- test/test_cudss.jl | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/test/test_cudss.jl b/test/test_cudss.jl index df37a24..56a7f11 100644 --- a/test/test_cudss.jl +++ b/test/test_cudss.jl @@ -346,7 +346,7 @@ function cudss_generic() end function user_permutation() - function permutation_lu(A_cpu, x_cpu, b_cpu, permutation) + function permutation_lu(T, A_cpu, x_cpu, b_cpu, permutation) A_gpu = CuSparseMatrixCSR(A_cpu) x_gpu = CuVector(x_cpu) b_gpu = CuVector(b_cpu) @@ -363,7 +363,7 @@ function user_permutation() return nz end - function permutation_ldlt(A_cpu, x_cpu, b_cpu, permutation) + function permutation_ldlt(T, A_cpu, x_cpu, b_cpu, permutation) A_gpu = CuSparseMatrixCSR(A_cpu |> tril) x_gpu = CuVector(x_cpu) b_gpu = CuVector(b_cpu) @@ -380,7 +380,7 @@ function user_permutation() return nz end - function permutation_llt(A_cpu, x_cpu, b_cpu, permutation) + function permutation_llt(T, A_cpu, x_cpu, b_cpu, permutation) A_gpu = CuSparseMatrixCSR(A_cpu |> triu) x_gpu = CuVector(x_cpu) b_gpu = CuVector(b_cpu) @@ -411,10 +411,10 @@ function user_permutation() A_cpu = sprand(T, n, n, 0.05) + I x_cpu = zeros(T, n) b_cpu = rand(T, n) - nz1_cpu = permutation_lu(A_cpu, x_cpu, b_cpu, perm1_cpu) - nz2_cpu = permutation_lu(A_cpu, x_cpu, b_cpu, perm2_cpu) - nz1_gpu = permutation_lu(A_cpu, x_cpu, b_cpu, perm1_gpu) - nz2_gpu = permutation_lu(A_cpu, x_cpu, b_cpu, perm2_gpu) + nz1_cpu = permutation_lu(T, A_cpu, x_cpu, b_cpu, perm1_cpu) + nz2_cpu = permutation_lu(T, A_cpu, x_cpu, b_cpu, perm2_cpu) + nz1_gpu = permutation_lu(T, A_cpu, x_cpu, b_cpu, perm1_gpu) + nz2_gpu = permutation_lu(T, A_cpu, x_cpu, b_cpu, perm2_gpu) @test nz1_cpu == nz1_gpu @test nz2_cpu == nz2_gpu @test nz1_cpu != nz2_cpu @@ -424,10 +424,10 @@ function user_permutation() A_cpu = A_cpu + A_cpu' x_cpu = zeros(T, n) b_cpu = rand(T, n) - nz1_cpu = permutation_ldlt(A_cpu, x_cpu, b_cpu, perm1_cpu) - nz2_cpu = permutation_ldlt(A_cpu, x_cpu, b_cpu, perm2_cpu) - nz1_gpu = permutation_ldlt(A_cpu, x_cpu, b_cpu, perm1_gpu) - nz2_gpu = permutation_ldlt(A_cpu, x_cpu, b_cpu, perm2_gpu) + nz1_cpu = permutation_ldlt(T, A_cpu, x_cpu, b_cpu, perm1_cpu) + nz2_cpu = permutation_ldlt(T, A_cpu, x_cpu, b_cpu, perm2_cpu) + nz1_gpu = permutation_ldlt(T, A_cpu, x_cpu, b_cpu, perm1_gpu) + nz2_gpu = permutation_ldlt(T, A_cpu, x_cpu, b_cpu, perm2_gpu) @test nz1_cpu == nz1_gpu @test nz2_cpu == nz2_gpu @test nz1_cpu != nz2_cpu @@ -437,10 +437,10 @@ function user_permutation() A_cpu = A_cpu * A_cpu' + I x_cpu = zeros(T, n) b_cpu = rand(T, n) - nz1_cpu = permutation_llt(A_cpu, x_cpu, b_cpu, perm1_cpu) - nz2_cpu = permutation_llt(A_cpu, x_cpu, b_cpu, perm2_cpu) - nz1_gpu = permutation_llt(A_cpu, x_cpu, b_cpu, perm1_gpu) - nz2_gpu = permutation_llt(A_cpu, x_cpu, b_cpu, perm2_gpu) + nz1_cpu = permutation_llt(T, A_cpu, x_cpu, b_cpu, perm1_cpu) + nz2_cpu = permutation_llt(T, A_cpu, x_cpu, b_cpu, perm2_cpu) + nz1_gpu = permutation_llt(T, A_cpu, x_cpu, b_cpu, perm1_gpu) + nz2_gpu = permutation_llt(T, A_cpu, x_cpu, b_cpu, perm2_gpu) @test nz1_cpu == nz1_gpu @test nz2_cpu == nz2_gpu @test nz1_cpu != nz2_cpu