Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Factorize and solve the Laplacian on the GPU #84

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 77 additions & 7 deletions test/OperatorsCUDA.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
module TestOperatorsCUDA

using Test
using SparseArrays
using LinearAlgebra
using CombinatorialSpaces
using CUDA
using CUDA.CUSPARSE
using CUDA.CUSOLVER
using Catlab
using Random
using CombinatorialSpaces
using GeometryBasics: Point2, Point3
using StaticArrays: SVector
using Krylov
using LinearAlgebra
using LinearOperators
using Random
using SparseArrays
using StaticArrays: SVector
using Statistics: mean
using Test

Point2D = Point2{Float64}
Point3D = Point3{Float64}
Expand Down Expand Up @@ -148,4 +152,70 @@ end
end
end

end
@testset "Krylov" begin
for sd in dual_meshes_2D
Δ0_cpu = Δ(0,sd)
Δ0 = CuSparseMatrixCSC(Δ0_cpu)
w = Δ0 * CuArray(map(x -> x[1]^3, point(sd)))

v = CuArray(zeros(Float64, nv(sd)))
y = CuArray(zeros(Float64, nv(sd)))
z = CuArray(zeros(Float64, nv(sd)))

# Factorizing on the GPU:
Δ0p = ilu02(Δ0)
op = LinearOperator(Float64, nv(sd), nv(sd), false, false,
(y,x) -> begin
ldiv!(z, LowerTriangular(Δ0p), x)
ldiv!(y, UnitUpperTriangular(Δ0p), z)
y
end)

x, stats = bicgstab(Δ0, w, M=op, atol=1e-13)
resid = Δ0*x .- w
RMS = sqrt(mean(resid.^2))
@test stats.solved
@test RMS < 1e-5

x, stats = gmres(Δ0, w, M=op, atol=1e-13)
RMS = sqrt(mean(resid.^2))
@test stats.solved
@test RMS < 1e-5

# Factorizing on the CPU:
luΔ0_cpu = lu(Δ0_cpu)
L = CuSparseMatrixCSC(luΔ0_cpu.L)
U = CuSparseMatrixCSC(luΔ0_cpu.U)
p = CuArray(luΔ0_cpu.p)
q⁻¹ = CuArray(invperm(luΔ0_cpu.q))
Rs = CuArray(luΔ0_cpu.Rs)

# Solving on the GPU:
w_Rsp = (w .* Rs)[p]
ldiv!(z, LowerTriangular(L), w_Rsp)
ldiv!(v, UpperTriangular(U), z)
y .= v[q⁻¹]

resid = Δ0*y .- w
RMS = sqrt(mean(resid.^2))
@test RMS < 1e-8

# Solving on the CPU:
y_cpu = luΔ0_cpu \ Array(w)
resid = Δ0_cpu*(y_cpu) .- Array(w)
RMS = sqrt(mean(resid.^2))
@test RMS < 1e-8

# Comparisons of the solutions:
resid_resid = Array(Δ0*y) .- Δ0_cpu*y_cpu
RMS = sqrt(mean(resid_resid.^2))

# CUDA's native qr facorization-solve.
using CUDA.CUSOLVER: SparseQR, spqr_solve
Δ0_CSR = CuSparseMatrixCSR(Δ0_cpu)
Δ0_QR = SparseQR(Δ0_CSR)
spqr_solve(Δ0_QR, w, y)
end
end

end
Loading