From 9d537a2e75f0acdd1fc3a73f6febbdd428e7628c Mon Sep 17 00:00:00 2001 From: Luke Morris Date: Thu, 16 May 2024 17:32:46 -0400 Subject: [PATCH 1/2] Factorize and solve the Laplacian on the GPU --- test/OperatorsCUDA.jl | 77 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 70 insertions(+), 7 deletions(-) diff --git a/test/OperatorsCUDA.jl b/test/OperatorsCUDA.jl index b09eff7e..ca110767 100644 --- a/test/OperatorsCUDA.jl +++ b/test/OperatorsCUDA.jl @@ -1,15 +1,18 @@ module TestOperatorsCUDA -using Test -using SparseArrays -using LinearAlgebra -using CombinatorialSpaces using CUDA +using CUDA.CUSPARSE using Catlab -using Random +using CombinatorialSpaces using GeometryBasics: Point2, Point3 -using StaticArrays: SVector using Krylov +using LinearAlgebra +using LinearOperators +using Random +using SparseArrays +using StaticArrays: SVector +using Statistics: mean +using Test Point2D = Point2{Float64} Point3D = Point3{Float64} @@ -148,4 +151,64 @@ end end end -end \ No newline at end of file +@testset "Krylov" begin + for sd in dual_meshes_2D + Δ0_cpu = Δ(0,sd) + Δ0 = CuSparseMatrixCSC(Δ0_cpu) + w = Δ0 * CuArray(map(x -> x[1]^3, point(sd))) + + v = CuArray(zeros(Float64, nv(sd))) + y = CuArray(zeros(Float64, nv(sd))) + z = CuArray(zeros(Float64, nv(sd))) + + # Factorizing on the GPU: + Δ0p = ilu02(Δ0) + op = LinearOperator(Float64, nv(sd), nv(sd), false, false, + (y,x) -> begin + ldiv!(z, LowerTriangular(Δ0p), x) + ldiv!(y, UnitUpperTriangular(Δ0p), z) + y + end) + + x, stats = bicgstab(Δ0, w, M=op, atol=1e-13) + resid = Δ0*x .- w + RMS = sqrt(mean(resid.^2)) + @test stats.solved + @test RMS < 1e-5 + + x, stats = gmres(Δ0, w, M=op, atol=1e-13) + RMS = sqrt(mean(resid.^2)) + @test stats.solved + @test RMS < 1e-5 + + # Factorizing on the CPU: + luΔ0_cpu = lu(Δ0_cpu) + L = CuSparseMatrixCSC(luΔ0_cpu.L) + U = CuSparseMatrixCSC(luΔ0_cpu.U) + p = CuArray(luΔ0_cpu.p) + q⁻¹ = CuArray(invperm(luΔ0_cpu.q)) + Rs = CuArray(luΔ0_cpu.Rs) + + # Solving on the GPU: + w_Rsp = (w .* Rs)[p] + ldiv!(z, LowerTriangular(L), w_Rsp) + ldiv!(v, UpperTriangular(U), z) + y .= v[q⁻¹] + + resid = Δ0*y .- w + RMS = sqrt(mean(resid.^2)) + @test RMS < 1e-8 + + # Solving on the CPU: + y_cpu = luΔ0_cpu \ Array(w) + resid = Δ0_cpu*(y_cpu) .- Array(w) + RMS = sqrt(mean(resid.^2)) + @test RMS < 1e-8 + + # Comparisons of the solutions: + resid_resid = Array(Δ0*y) .- Δ0_cpu*y_cpu + RMS = sqrt(mean(resid_resid.^2)) + end +end + +end From e8509fa5be6e37e3dee1d9ba20fd956a14600ef6 Mon Sep 17 00:00:00 2001 From: Luke Morris Date: Fri, 17 May 2024 16:39:16 -0400 Subject: [PATCH 2/2] Demonstrate using internal CUDA QR solve (fails if singular) --- test/OperatorsCUDA.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/OperatorsCUDA.jl b/test/OperatorsCUDA.jl index ca110767..11654601 100644 --- a/test/OperatorsCUDA.jl +++ b/test/OperatorsCUDA.jl @@ -2,6 +2,7 @@ module TestOperatorsCUDA using CUDA using CUDA.CUSPARSE +using CUDA.CUSOLVER using Catlab using CombinatorialSpaces using GeometryBasics: Point2, Point3 @@ -208,6 +209,12 @@ end # Comparisons of the solutions: resid_resid = Array(Δ0*y) .- Δ0_cpu*y_cpu RMS = sqrt(mean(resid_resid.^2)) + + # CUDA's native qr facorization-solve. + using CUDA.CUSOLVER: SparseQR, spqr_solve + Δ0_CSR = CuSparseMatrixCSR(Δ0_cpu) + Δ0_QR = SparseQR(Δ0_CSR) + spqr_solve(Δ0_QR, w, y) end end