exanauts · amontoison · Jul 5, 2024 · Jul 5, 2024 · Jul 5, 2024
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -46,4 +46,8 @@ include("test_cudss.jl")
   @testset "Iterative refinement" begin
     iterative_refinement()
   end
+
+  @testset "Small matrices" begin
+    small_matrices()
+  end
 end
diff --git a/test/test_cudss.jl b/test/test_cudss.jl
@@ -512,13 +512,19 @@ function iterative_refinement()
     return norm(r_gpu)
   end
 
-  function ir_ldlt(T, A_cpu, x_cpu, b_cpu, ir)
-    A_gpu = CuSparseMatrixCSR(A_cpu |> tril)
+  function ir_ldlt(T, A_cpu, x_cpu, b_cpu, ir, uplo)
+    if uplo == 'L'
+      A_gpu = CuSparseMatrixCSR(A_cpu |> tril)
+    elseif uplo == 'U'
+      A_gpu = CuSparseMatrixCSR(A_cpu |> triu)
+    else
+      A_gpu = CuSparseMatrixCSR(A_cpu)
+    end
     x_gpu = CuVector(x_cpu)
     b_gpu = CuVector(b_cpu)
 
     structure = T <: Real ? "S" : "H"
-    solver = CudssSolver(A_gpu, structure, 'L')
+    solver = CudssSolver(A_gpu, structure, uplo)
     cudss_set(solver, "ir_n_steps", ir)
 
     cudss("analysis", solver, x_gpu, b_gpu)
@@ -529,13 +535,19 @@ function iterative_refinement()
     return norm(r_gpu)
   end
 
-  function ir_llt(T, A_cpu, x_cpu, b_cpu, ir)
-    A_gpu = CuSparseMatrixCSR(A_cpu |> triu)
+  function ir_llt(T, A_cpu, x_cpu, b_cpu, ir, uplo)
+    if uplo == 'L'
+      A_gpu = CuSparseMatrixCSR(A_cpu |> tril)
+    elseif uplo == 'U'
+      A_gpu = CuSparseMatrixCSR(A_cpu |> triu)
+    else
+      A_gpu = CuSparseMatrixCSR(A_cpu)
+    end
     x_gpu = CuVector(x_cpu)
     b_gpu = CuVector(b_cpu)
 
     structure = T <: Real ? "SPD" : "HPD"
-    solver = CudssSolver(A_gpu, structure, 'U')
+    solver = CudssSolver(A_gpu, structure, uplo)
     cudss_set(solver, "ir_n_steps", ir)
 
     cudss("analysis", solver, x_gpu, b_gpu)
@@ -562,17 +574,115 @@ function iterative_refinement()
         A_cpu = A_cpu + A_cpu'
         x_cpu = zeros(T, n)
         b_cpu = rand(T, n)
-        res = ir_ldlt(T, A_cpu, x_cpu, b_cpu, ir)
-        @test res ≤ √eps(R)
+        @testset "uplo = $uplo" for uplo in ('L', 'U', 'F')
+          res = ir_ldlt(T, A_cpu, x_cpu, b_cpu, ir, uplo)
+          @test res ≤ √eps(R)
+        end
       end
       @testset "LLᵀ / LLᴴ" begin
         A_cpu = sprand(T, n, n, 0.01)
         A_cpu = A_cpu * A_cpu' + I
         x_cpu = zeros(T, n)
         b_cpu = rand(T, n)
-        res = ir_llt(T, A_cpu, x_cpu, b_cpu, ir)
+        @testset "uplo = $uplo" for uplo in ('L', 'U', 'F')
+          res = ir_llt(T, A_cpu, x_cpu, b_cpu, ir, uplo)
+          @test res ≤ √eps(R)
+        end
+      end
+    end
+  end
+end
+
+function small_matrices()
+  function cudss_lu(T, A_cpu, x_cpu, b_cpu)
+    A_gpu = CuSparseMatrixCSR(A_cpu)
+    x_gpu = CuVector(x_cpu)
+    b_gpu = CuVector(b_cpu)
+
+    solver = CudssSolver(A_gpu, "G", 'F')
+
+    cudss("analysis", solver, x_gpu, b_gpu)
+    cudss("factorization", solver, x_gpu, b_gpu)
+    cudss("solve", solver, x_gpu, b_gpu)
+
+    r_gpu = b_gpu - A_gpu * x_gpu
+    return norm(r_gpu)
+  end
+
+  function cudss_ldlt(T, A_cpu, x_cpu, b_cpu, uplo)
+    if uplo == 'L'
+      A_gpu = CuSparseMatrixCSR(A_cpu |> tril)
+    elseif uplo == 'U'
+      A_gpu = CuSparseMatrixCSR(A_cpu |> triu)
+    else
+      A_gpu = CuSparseMatrixCSR(A_cpu)
+    end
+    x_gpu = CuVector(x_cpu)
+    b_gpu = CuVector(b_cpu)
+
+    structure = T <: Real ? "S" : "H"
+    solver = CudssSolver(A_gpu, structure, uplo)
+
+    cudss("analysis", solver, x_gpu, b_gpu)
+    cudss("factorization", solver, x_gpu, b_gpu)
+    cudss("solve", solver, x_gpu, b_gpu)
+
+    r_gpu = b_gpu - CuSparseMatrixCSR(A_cpu) * x_gpu
+    return norm(r_gpu)
+  end
+
+  function cudss_llt(T, A_cpu, x_cpu, b_cpu, uplo)
+    if uplo == 'L'
+      A_gpu = CuSparseMatrixCSR(A_cpu |> tril)
+    elseif uplo == 'U'
+      A_gpu = CuSparseMatrixCSR(A_cpu |> triu)
+    else
+      A_gpu = CuSparseMatrixCSR(A_cpu)
+    end
+    x_gpu = CuVector(x_cpu)
+    b_gpu = CuVector(b_cpu)
+
+    structure = T <: Real ? "SPD" : "HPD"
+    solver = CudssSolver(A_gpu, structure, uplo)
+
+    cudss("analysis", solver, x_gpu, b_gpu)
+    cudss("factorization", solver, x_gpu, b_gpu)
+    cudss("solve", solver, x_gpu, b_gpu)
+
+    r_gpu = b_gpu - CuSparseMatrixCSR(A_cpu) * x_gpu
+    return norm(r_gpu)
+  end
+
+  @testset "precision = $T" for T in (Float32, Float64, ComplexF32, ComplexF64)
+    R = real(T)
+    @testset "Size of the linear system: $n" for n in 1:16
+      @testset "LU" begin
+        A_cpu = sprand(T, n, n, 0.05) + I
+        x_cpu = zeros(T, n)
+        b_cpu = rand(T, n)
+        res = cudss_lu(T, A_cpu, x_cpu, b_cpu)
         @test res ≤ √eps(R)
       end
+      @testset "LDLᵀ / LDLᴴ" begin
+        A_cpu = sprand(T, n, n, 0.05) + I
+        A_cpu = A_cpu + A_cpu'
+        x_cpu = zeros(T, n)
+        b_cpu = rand(T, n)
+        @testset "uplo = $uplo" for uplo in ('L', 'U', 'F')
+          res = cudss_ldlt(T, A_cpu, x_cpu, b_cpu, uplo)
+          @test res ≤ √eps(R)
+        end
+      end
+      @testset "LLᵀ / LLᴴ" begin
+        A_cpu = sprand(T, n, n, 0.01)
+        A_cpu = A_cpu * A_cpu' + I
+        x_cpu = zeros(T, n)
+        b_cpu = rand(T, n)
+        @testset "uplo = $uplo" for uplo in ('L', 'U', 'F')
+          res = cudss_llt(T, A_cpu, x_cpu, b_cpu, uplo)
+          @test res ≤ √eps(R)
+        end
+      end
     end
   end
 end