[documentation] Iterative refinement and user permutation

exanauts · May 29, 2024 · 3a091bc · 3a091bc
1 parent 6e7eb01
commit 3a091bc
Show file tree

Hide file tree

Showing 3 changed files with 70 additions and 3 deletions.
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -1,7 +1,8 @@
 [deps]
-Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
-CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e"
+AMD = "14f7f29c-3bd6-536c-9a0b-7339e30b5a3e"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e"
+Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 
 [compat]

diff --git a/docs/make.jl b/docs/make.jl
@@ -11,7 +11,8 @@ makedocs(
                            collapselevel = 1),
   sitename = "CUDSS.jl",
   pages = ["Home"        => "index.md",
-           "Generic API" => "generic.md"]
+           "Generic API" => "generic.md",
+           "Options"     => "options.md"]
 )
 
 deploydocs(

diff --git a/docs/src/options.md b/docs/src/options.md
@@ -0,0 +1,65 @@
+## Iterative refinement
+
+```julia
+using CUDA, CUDA.CUSPARSE
+using CUDSS
+using LinearAlgebra
+using SparseArrays
+
+T = Float64
+n = 100
+p = 5
+A_cpu = sprand(T, n, n, 0.01)
+A_cpu = A_cpu + I
+B_cpu = rand(T, n, p)
+
+A_gpu = CuSparseMatrixCSR(A_cpu)
+B_gpu = CuMatrix(B_cpu)
+X_gpu = similar(B_gpu)
+
+solver = CudssSolver(A_gpu, "G", 'F')
+
+# Perform one step of iterative refinement
+ir = 1
+cudss_set(solver, "ir_n_steps", ir)
+
+cudss("analysis", solver, X_gpu, B_gpu)
+cudss("factorization", solver, X_gpu, B_gpu)
+cudss("solve", solver, X_gpu, B_gpu)
+
+R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu
+norm(R_gpu)
+```
+
+## User permutation
+
+```julia
+using CUDA, CUDA.CUSPARSE
+using CUDSS
+using LinearAlgebra
+using SparseArrays
+using AMD
+
+T = ComplexF64
+n = 100
+A_cpu = sprand(T, n, n, 0.01)
+A_cpu = A_cpu' * A_cpu + I
+b_cpu = rand(T, n)
+
+A_gpu = CuSparseMatrixCSR(A_cpu)
+b_gpu = CuVector(b_cpu)
+x_gpu = similar(b_gpu)
+
+solver = CudssSolver(A_gpu, "HPD", 'F')
+
+# Provide a user permutation
+permutation = amd(A_cpu) |> Vector{Cint}
+cudss_set(solver, "user_perm", permutation)
+
+cudss("analysis", solver, x_gpu, b_gpu)
+cudss("factorization", solver, x_gpu, b_gpu)
+cudss("solve", solver, x_gpu, b_gpu)
+
+r_gpu = b_gpu - CuSparseMatrixCSR(A_cpu) * x_gpu
+norm(r_gpu)
+```