diff --git a/Project.toml b/Project.toml
index f536d45..4f3e5ec 100644
--- a/Project.toml
+++ b/Project.toml
@@ -17,10 +17,12 @@ CUDSS_jll = "0.1.0"
 julia = "1.6"
 LinearAlgebra = "1.6"
 SparseArrays = "1.6"
+Random = "1.6"
 Test = "1.6"
 
 [extras]
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test"]
+test = ["Random", "Test"]
diff --git a/docs/src/generic.md b/docs/src/generic.md
index b9690e6..7cabfc9 100644
--- a/docs/src/generic.md
+++ b/docs/src/generic.md
@@ -21,6 +21,18 @@ x_gpu = F \ b_gpu
 
 r_gpu = b_gpu - A_gpu * x_gpu
 norm(r_gpu)
+
+# In-place LU
+d_gpu = rand(T, n) |> CuVector
+B_gpu = A_gpu + Diagonal(d_gpu)
+lu!(F, B_gpu)
+
+c_cpu = rand(T, n)
+c_gpu = CuVector(c_cpu)
+ldiv!(x_gpu, F, c_gpu)
+
+r_gpu = c_gpu - B_gpu * x_gpu
+norm(r_gpu)
 ```
 
 ## Example 2: Sparse symmetric linear system with multiple right-hand sides
@@ -32,6 +44,7 @@ using LinearAlgebra
 using SparseArrays
 
 T = Float64
+R = real(T)
 n = 100
 p = 5
 A_cpu = sprand(T, n, n, 0.05) + I
@@ -43,14 +56,26 @@ B_gpu = CuMatrix(B_cpu)
 X_gpu = similar(B_gpu)
 
 F = ldlt(A_gpu, view='L')
-ldiv!(X_gpu, F, B_gpu)
+X_gpu = F \ B_gpu
 
 R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu
 norm(R_gpu)
+
+# In-place LDLᵀ
+d_gpu = rand(R, n) |> CuVector
+B_gpu = A_gpu + Diagonal(d_gpu)
+ldlt!(F, B_gpu)
+
+C_cpu = rand(T, n, p)
+C_gpu = CuMatrix(C_cpu)
+ldiv!(X_gpu, F, C_gpu)
+
+R_gpu = C_gpu - ( CuSparseMatrixCSR(A_cpu) + Diagonal(d_gpu) ) * X_gpu
+norm(R_gpu)
 ```
 
 !!! note
-    If we only store one triangle of `A_gpu`, we can also use the wrappers `Symmetric` and `Hermitian`. For real matrices, both wrappers are allowed but only `Hermitian` can be used for complex matrices.
+    If we only store one triangle of `A_gpu`, we can also use the wrappers `Symmetric` and `Hermitian` instead of using the keyword argument `view` in `ldlt`. For real matrices, both wrappers are allowed but only `Hermitian` can be used for complex matrices.
 
 ```julia
 S_gpu = Symmetric(A_gpu, :L)
@@ -66,6 +91,7 @@ using LinearAlgebra
 using SparseArrays
 
 T = ComplexF64
+R = real(T)
 n = 100
 p = 5
 A_cpu = sprand(T, n, n, 0.01)
@@ -77,14 +103,26 @@ B_gpu = CuMatrix(B_cpu)
 X_gpu = similar(B_gpu)
 
 F = cholesky(A_gpu, view='U')
-ldiv!(X_gpu, F, B_gpu)
+X_gpu = F \ B_gpu
 
 R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu
 norm(R_gpu)
+
+# In-place LLᴴ
+d_gpu = rand(R, n) |> CuVector
+B_gpu = A_gpu + Diagonal(d_gpu)
+cholesky!(F, B_gpu)
+
+C_cpu = rand(T, n, p)
+C_gpu = CuMatrix(C_cpu)
+ldiv!(X_gpu, F, C_gpu)
+
+R_gpu = C_gpu - ( CuSparseMatrixCSR(A_cpu) + Diagonal(d_gpu) ) * X_gpu
+norm(R_gpu)
 ```
 
 !!! note
-    If we only store one triangle of `A_gpu`, we can also use the wrappers `Symmetric` and `Hermitian`. For real matrices, both wrappers are allowed but only `Hermitian` can be used for complex matrices.
+    If we only store one triangle of `A_gpu`, we can also use the wrappers `Symmetric` and `Hermitian` instead of using the keyword argument `view` in `cholesky`. For real matrices, both wrappers are allowed but only `Hermitian` can be used for complex matrices.
 
 ```julia
 H_gpu = Hermitian(A_gpu, :U)
diff --git a/src/generic.jl b/src/generic.jl
index 8b11455..0dc5386 100644
--- a/src/generic.jl
+++ b/src/generic.jl
@@ -1,4 +1,4 @@
-function LinearAlgebra.lu(A::CuSparseMatrixCSR{T}) where T <: BlasFloat
+function LinearAlgebra.lu(A::CuSparseMatrixCSR{T,Cint}) where T <: BlasFloat
   n = LinearAlgebra.checksquare(A)
   solver = CudssSolver(A, "G", 'F')
   x = CudssMatrix(T, n)
@@ -8,7 +8,7 @@ function LinearAlgebra.lu(A::CuSparseMatrixCSR{T}) where T <: BlasFloat
   return solver
 end
 
-function LinearAlgebra.ldlt(A::CuSparseMatrixCSR{T}; view::Char='F') where T <: BlasFloat
+function LinearAlgebra.ldlt(A::CuSparseMatrixCSR{T,Cint}; view::Char='F') where T <: BlasFloat
   n = LinearAlgebra.checksquare(A)
   structure = T <: Real ? "S" : "H"
   solver = CudssSolver(A, structure, view)
@@ -20,10 +20,10 @@ function LinearAlgebra.ldlt(A::CuSparseMatrixCSR{T}; view::Char='F') where T <:
   return solver
 end
 
-LinearAlgebra.ldlt(A::Symmetric{T,<:CuSparseMatrixCSR{T}}) where T <: BlasReal = LinearAlgebra.ldlt(A.data, view=A.uplo)
-LinearAlgebra.ldlt(A::Hermitian{T,<:CuSparseMatrixCSR{T}}) where T <: BlasFloat = LinearAlgebra.ldlt(A.data, view=A.uplo)
+LinearAlgebra.ldlt(A::Symmetric{T,<:CuSparseMatrixCSR{T,Cint}}) where T <: BlasReal = LinearAlgebra.ldlt(A.data, view=A.uplo)
+LinearAlgebra.ldlt(A::Hermitian{T,<:CuSparseMatrixCSR{T,Cint}}) where T <: BlasFloat = LinearAlgebra.ldlt(A.data, view=A.uplo)
 
-function LinearAlgebra.cholesky(A::CuSparseMatrixCSR{T}; view::Char='F') where T <: BlasFloat
+function LinearAlgebra.cholesky(A::CuSparseMatrixCSR{T,Cint}; view::Char='F') where T <: BlasFloat
   n = LinearAlgebra.checksquare(A)
   structure = T <: Real ? "SPD" : "HPD"
   solver = CudssSolver(A, structure, view)
@@ -34,12 +34,12 @@ function LinearAlgebra.cholesky(A::CuSparseMatrixCSR{T}; view::Char='F') where T
   return solver
 end
 
-LinearAlgebra.cholesky(A::Symmetric{T,<:CuSparseMatrixCSR{T}}) where T <: BlasReal = LinearAlgebra.cholesky(A.data, view=A.uplo)
-LinearAlgebra.cholesky(A::Hermitian{T,<:CuSparseMatrixCSR{T}}) where T <: BlasFloat = LinearAlgebra.cholesky(A.data, view=A.uplo)
+LinearAlgebra.cholesky(A::Symmetric{T,<:CuSparseMatrixCSR{T,Cint}}) where T <: BlasReal = LinearAlgebra.cholesky(A.data, view=A.uplo)
+LinearAlgebra.cholesky(A::Hermitian{T,<:CuSparseMatrixCSR{T,Cint}}) where T <: BlasFloat = LinearAlgebra.cholesky(A.data, view=A.uplo)
 
 for fun in (:lu!, :ldlt!, :cholesky!)
   @eval begin
-    function LinearAlgebra.$fun(solver::CudssSolver{T}, A::CuSparseMatrixCSR{T}) where T <: BlasFloat
+    function LinearAlgebra.$fun(solver::CudssSolver{T}, A::CuSparseMatrixCSR{T,Cint}) where T <: BlasFloat
       n = LinearAlgebra.checksquare(A)
       cudss_set(solver.matrix, A)
       x = CudssMatrix(T, n)
diff --git a/src/helpers.jl b/src/helpers.jl
index a44cd7d..142c49c 100644
--- a/src/helpers.jl
+++ b/src/helpers.jl
@@ -6,7 +6,7 @@ export CudssMatrix, CudssData, CudssConfig
 """
     matrix = CudssMatrix(v::CuVector{T})
     matrix = CudssMatrix(A::CuMatrix{T})
-    matrix = CudssMatrix(A::CuSparseMatrixCSR{T}, struture::String, view::Char; index::Char='O')
+    matrix = CudssMatrix(A::CuSparseMatrixCSR{T,Cint}, struture::String, view::Char; index::Char='O')
 
 The type `T` can be `Float32`, `Float64`, `ComplexF32` or `ComplexF64`.
 
@@ -75,7 +75,7 @@ mutable struct CudssMatrix{T}
         obj
     end
 
-    function CudssMatrix(A::CuSparseMatrixCSR{T}, structure::String, view::Char; index::Char='O') where T <: BlasFloat
+    function CudssMatrix(A::CuSparseMatrixCSR{T,Cint}, structure::String, view::Char; index::Char='O') where T <: BlasFloat
         m,n = size(A)
         matrix_ref = Ref{cudssMatrix_t}()
         cudssMatrixCreateCsr(matrix_ref, m, n, nnz(A), A.rowPtr, CU_NULL,
diff --git a/src/interfaces.jl b/src/interfaces.jl
index 159823d..2616e3c 100644
--- a/src/interfaces.jl
+++ b/src/interfaces.jl
@@ -1,7 +1,7 @@
 export CudssSolver, cudss, cudss_set, cudss_get
 
 """
-    solver = CudssSolver(A::CuSparseMatrixCSR{T}, structure::String, view::Char; index::Char='O')
+    solver = CudssSolver(A::CuSparseMatrixCSR{T,Cint}, structure::String, view::Char; index::Char='O')
     solver = CudssSolver(matrix::CudssMatrix{T}, config::CudssConfig, data::CudssData)
 
 The type `T` can be `Float32`, `Float64`, `ComplexF32` or `ComplexF64`.
@@ -36,7 +36,7 @@ mutable struct CudssSolver{T}
     return new{T}(matrix, config, data)
   end
 
-  function CudssSolver(A::CuSparseMatrixCSR{T}, structure::String, view::Char; index::Char='O') where T <: BlasFloat
+  function CudssSolver(A::CuSparseMatrixCSR{T,Cint}, structure::String, view::Char; index::Char='O') where T <: BlasFloat
     matrix = CudssMatrix(A, structure, view; index)
     config = CudssConfig()
     data = CudssData()
@@ -47,7 +47,7 @@ end
 """
     cudss_set(matrix::CudssMatrix{T}, v::CuVector{T})
     cudss_set(matrix::CudssMatrix{T}, A::CuMatrix{T})
-    cudss_set(matrix::CudssMatrix{T}, A::CuSparseMatrixCSR{T})
+    cudss_set(matrix::CudssMatrix{T}, A::CuSparseMatrixCSR{T,Cint})
     cudss_set(data::CudssSolver, param::String, value)
     cudss_set(config::CudssConfig, param::String, value)
     cudss_set(data::CudssData, param::String, value)
@@ -80,7 +80,7 @@ function cudss_set(matrix::CudssMatrix{T}, A::CuMatrix{T}) where T <: BlasFloat
   cudssMatrixSetValues(matrix, A)
 end
 
-function cudss_set(matrix::CudssMatrix{T}, A::CuSparseMatrixCSR{T}) where T <: BlasFloat
+function cudss_set(matrix::CudssMatrix{T}, A::CuSparseMatrixCSR{T,Cint}) where T <: BlasFloat
   cudssMatrixSetCsrPointers(matrix, A.rowPtr, CU_NULL, A.colVal, A.nzVal)
 end
 
diff --git a/test/runtests.jl b/test/runtests.jl
index ff9717f..2a3a653 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,4 +1,4 @@
-using Test
+using Test, Random
 using CUDA, CUDA.CUSPARSE
 using CUDSS
 using SparseArrays
@@ -9,6 +9,8 @@ import CUDSS: CUDSS_ALG_DEFAULT, CUDSS_ALG_1, CUDSS_ALG_2, CUDSS_ALG_3
 
 @info("CUDSS_INSTALLATION : $(CUDSS.CUDSS_INSTALLATION)")
 
+Random.seed!(666)  # Random tests are diabolical
+
 include("test_cudss.jl")
 
 @testset "CUDSS" begin
diff --git a/test/test_cudss.jl b/test/test_cudss.jl
index 528767c..c9dd184 100644
--- a/test/test_cudss.jl
+++ b/test/test_cudss.jl
@@ -113,79 +113,90 @@ function cudss_execution()
   @testset "precision = $T" for T in (Float32, Float64, ComplexF32, ComplexF64)
     R = real(T)
     @testset "Unsymmetric -- Non-Hermitian" begin
-      A_cpu = sprand(T, n, n, 0.02) + I
-      x_cpu = zeros(T, n)
-      b_cpu = rand(T, n)
+      @testset "Pivoting = $pivot" for pivot in ('C', 'R', 'N')
+        A_cpu = sprand(T, n, n, 0.02) + I
+        x_cpu = zeros(T, n)
+        b_cpu = rand(T, n)
 
-      A_gpu = CuSparseMatrixCSR(A_cpu)
-      x_gpu = CuVector(x_cpu)
-      b_gpu = CuVector(b_cpu)
+        A_gpu = CuSparseMatrixCSR(A_cpu)
+        x_gpu = CuVector(x_cpu)
+        b_gpu = CuVector(b_cpu)
 
-      matrix = CudssMatrix(A_gpu, "G", 'F')
-      config = CudssConfig()
-      data = CudssData()
-      solver = CudssSolver(matrix, config, data)
+        matrix = CudssMatrix(A_gpu, "G", 'F')
+        config = CudssConfig()
+        data = CudssData()
+        solver = CudssSolver(matrix, config, data)
+        cudss_set(solver, "pivot_type", pivot)
 
-      cudss("analysis", solver, x_gpu, b_gpu)
-      cudss("factorization", solver, x_gpu, b_gpu)
-      cudss("solve", solver, x_gpu, b_gpu)
+        cudss("analysis", solver, x_gpu, b_gpu)
+        cudss("factorization", solver, x_gpu, b_gpu)
+        cudss("solve", solver, x_gpu, b_gpu)
 
-      r_gpu = b_gpu - A_gpu * x_gpu
-      @test norm(r_gpu) ≤ √eps(R)
+        r_gpu = b_gpu - A_gpu * x_gpu
+        @test norm(r_gpu) ≤ √eps(R)
+      end
     end
 
-    @testset "view = $view" for view in ('L', 'U', 'F')
-      @testset "Symmetric -- Hermitian" begin
-        A_cpu = sprand(T, n, n, 0.01) + I
-        A_cpu = A_cpu + A_cpu'
-        X_cpu = zeros(T, n, p)
-        B_cpu = rand(T, n, p)
+    symmetric_hermitian_pivots = T <: Real ? ('C', 'R', 'N') : ('N',)
+    @testset "Symmetric -- Hermitian" begin
+      @testset "view = $view" for view in ('F',)
+        @testset "Pivoting = $pivot" for pivot in symmetric_hermitian_pivots
+          A_cpu = sprand(T, n, n, 0.01) + I
+          A_cpu = A_cpu + A_cpu'
+          X_cpu = zeros(T, n, p)
+          B_cpu = rand(T, n, p)
 
-        (view == 'L') && (A_gpu = CuSparseMatrixCSR(A_cpu |> tril))
-        (view == 'U') && (A_gpu = CuSparseMatrixCSR(A_cpu |> triu))
-        (view == 'F') && (A_gpu = CuSparseMatrixCSR(A_cpu))
-        X_gpu = CuMatrix(X_cpu)
-        B_gpu = CuMatrix(B_cpu)
+          (view == 'L') && (A_gpu = CuSparseMatrixCSR(A_cpu |> tril))
+          (view == 'U') && (A_gpu = CuSparseMatrixCSR(A_cpu |> triu))
+          (view == 'F') && (A_gpu = CuSparseMatrixCSR(A_cpu))
+          X_gpu = CuMatrix(X_cpu)
+          B_gpu = CuMatrix(B_cpu)
 
-        structure = T <: Real ? "S" : "H"
-        matrix = CudssMatrix(A_gpu, structure, view)
-        config = CudssConfig()
-        data = CudssData()
-        solver = CudssSolver(matrix, config, data)
-        (structure == "H") && cudss_set(solver, "pivot_type", 'N')
+          structure = T <: Real ? "S" : "H"
+          matrix = CudssMatrix(A_gpu, structure, view)
+          config = CudssConfig()
+          data = CudssData()
+          solver = CudssSolver(matrix, config, data)
+          cudss_set(solver, "pivot_type", pivot)
 
-        cudss("analysis", solver, X_gpu, B_gpu)
-        cudss("factorization", solver, X_gpu, B_gpu)
-        cudss("solve", solver, X_gpu, B_gpu)
+          cudss("analysis", solver, X_gpu, B_gpu)
+          cudss("factorization", solver, X_gpu, B_gpu)
+          cudss("solve", solver, X_gpu, B_gpu)
 
-        R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu
-        @test norm(R_gpu) ≤ √eps(R)
+          R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu
+          @test norm(R_gpu) ≤ √eps(R)
+        end
       end
+    end
 
-      @testset "SPD -- HPD" begin
-        A_cpu = sprand(T, n, n, 0.01)
-        A_cpu = A_cpu * A_cpu' + I
-        X_cpu = zeros(T, n, p)
-        B_cpu = rand(T, n, p)
+    @testset "SPD -- HPD" begin
+      @testset "view = $view" for view in ('F',)
+        @testset "Pivoting = $pivot" for pivot in ('C', 'R', 'N')
+          A_cpu = sprand(T, n, n, 0.01)
+          A_cpu = A_cpu * A_cpu' + I
+          X_cpu = zeros(T, n, p)
+          B_cpu = rand(T, n, p)
 
-        (view == 'L') && (A_gpu = CuSparseMatrixCSR(A_cpu |> tril))
-        (view == 'U') && (A_gpu = CuSparseMatrixCSR(A_cpu |> triu))
-        (view == 'F') && (A_gpu = CuSparseMatrixCSR(A_cpu))
-        X_gpu = CuMatrix(X_cpu)
-        B_gpu = CuMatrix(B_cpu)
+          (view == 'L') && (A_gpu = CuSparseMatrixCSR(A_cpu |> tril))
+          (view == 'U') && (A_gpu = CuSparseMatrixCSR(A_cpu |> triu))
+          (view == 'F') && (A_gpu = CuSparseMatrixCSR(A_cpu))
+          X_gpu = CuMatrix(X_cpu)
+          B_gpu = CuMatrix(B_cpu)
 
-        structure = T <: Real ? "SPD" : "HPD"
-        matrix = CudssMatrix(A_gpu, structure, view)
-        config = CudssConfig()
-        data = CudssData()
-        solver = CudssSolver(matrix, config, data)
+          structure = T <: Real ? "SPD" : "HPD"
+          matrix = CudssMatrix(A_gpu, structure, view)
+          config = CudssConfig()
+          data = CudssData()
+          solver = CudssSolver(matrix, config, data)
+          cudss_set(solver, "pivot_type", pivot)
 
-        cudss("analysis", solver, X_gpu, B_gpu)
-        cudss("factorization", solver, X_gpu, B_gpu)
-        cudss("solve", solver, X_gpu, B_gpu)
+          cudss("analysis", solver, X_gpu, B_gpu)
+          cudss("factorization", solver, X_gpu, B_gpu)
+          cudss("solve", solver, X_gpu, B_gpu)
 
-        R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu
-        @test norm(R_gpu) ≤ √eps(R)
+          R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu
+          @test norm(R_gpu) ≤ √eps(R)
+        end
       end
     end
   end
@@ -234,8 +245,8 @@ function cudss_generic()
       end
     end
 
-    @testset "view = $view" for view in ('F',)
-      @testset "Symmetric -- Hermitian" begin
+    @testset "Symmetric -- Hermitian" begin
+      @testset "view = $view" for view in ('F',)
         A_cpu = sprand(T, n, n, 0.01) + I
         A_cpu = A_cpu + A_cpu'
         B_cpu = rand(T, n, p)
@@ -281,8 +292,10 @@ function cudss_generic()
           @test norm(R_gpu2) ≤ √eps(R)
         end
       end
+    end
 
-      @testset "SPD -- HPD" begin
+    @testset "SPD -- HPD" begin
+      @testset "view = $view" for view in ('F',)
         A_cpu = sprand(T, n, n, 0.01)
         A_cpu = A_cpu * A_cpu' + I
         B_cpu = rand(T, n, p)