From 9cfadfc9fcb501cf38cce346d2585e2c85f3d241 Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Fri, 19 Apr 2024 09:29:33 -0400 Subject: [PATCH 1/4] Symbolic Factorization Reuse in the standard LUFactorization This should be sufficient for CUDSS.jl to be optimally used as well --- src/LinearSolve.jl | 3 +++ src/factorization.jl | 46 +++++++++++++++++++++++++++---------- src/factorization_sparse.jl | 6 +++++ 3 files changed, 43 insertions(+), 12 deletions(-) diff --git a/src/LinearSolve.jl b/src/LinearSolve.jl index e274521c9..bcbc1aa74 100644 --- a/src/LinearSolve.jl +++ b/src/LinearSolve.jl @@ -82,6 +82,9 @@ _isidentity_struct(::SciMLOperators.IdentityOperator) = true # Dispatch Friendly way to check if an extension is loaded __is_extension_loaded(::Val) = false +# Check if a sparsity pattern has changed +pattern_changed(fact, A) = false + function _fast_sym_givens! end # Code diff --git a/src/factorization.jl b/src/factorization.jl index 98c431d9e..7e6a2b2b1 100644 --- a/src/factorization.jl +++ b/src/factorization.jl @@ -49,10 +49,15 @@ Julia's built in `lu`. Equivalent to calling `lu!(A)` - pivot: The choice of pivoting. Defaults to `LinearAlgebra.RowMaximum()`. The other choice is `LinearAlgebra.NoPivot()`. """ -struct LUFactorization{P} <: AbstractFactorization - pivot::P +Base.@kwdef struct LUFactorization{P} <: AbstractFactorization + pivot::P = LinearAlgebra.RowMaximum() + reuse_symbolic::Bool = true + check_pattern::Bool = true # Check factorization re-use end +# Legacy dispatch +LUFactorization(pivot) = LUFactorization(;pivot=RowMaximum()) + """ `GenericLUFactorization(pivot=LinearAlgebra.RowMaximum())` @@ -69,10 +74,33 @@ struct GenericLUFactorization{P} <: AbstractFactorization pivot::P end -LUFactorization() = LUFactorization(RowMaximum()) - GenericLUFactorization() = GenericLUFactorization(RowMaximum()) +function SciMLBase.solve!(cache::LinearCache, alg::LUFactorization; kwargs...) + A = cache.A + A = convert(AbstractMatrix, A) + if cache.isfresh + cacheval = @get_cacheval(cache, :LUFactorization) + if A isa AbstractSparseMatrix && alg.reuse_symbolic + # Caches the symbolic factorization: https://github.com/JuliaLang/julia/pull/33738 + # If SparseMatrixCSC, check if the pattern has changed + if alg.check_pattern && pattern_changed(cacheval, A) + fact = lu(A, check = false) + else + fact = lu!(cacheval, A, check = false) + end + else + fact = lu(A, check = false) + end + cache.cacheval = fact + cache.isfresh = false + end + + F = @get_cacheval(cache, :LUFactorization) + y = ldiv!(cache.u, F, cache.b) + SciMLBase.build_linear_solution(alg, y, nothing, cache) +end + function do_factorization(alg::LUFactorization, A, b, u) A = convert(AbstractMatrix, A) if A isa AbstractSparseMatrixCSC @@ -775,10 +803,7 @@ function SciMLBase.solve!(cache::LinearCache, alg::UMFPACKFactorization; kwargs. cacheval = @get_cacheval(cache, :UMFPACKFactorization) if alg.reuse_symbolic # Caches the symbolic factorization: https://github.com/JuliaLang/julia/pull/33738 - if alg.check_pattern && !(SparseArrays.decrement(SparseArrays.getcolptr(A)) == - cacheval.colptr && - SparseArrays.decrement(SparseArrays.getrowval(A)) == - cacheval.rowval) + if alg.check_pattern && pattern_changed(cacheval, A) fact = lu( SparseMatrixCSC(size(A)..., getcolptr(A), rowvals(A), nonzeros(A)), @@ -856,10 +881,7 @@ function SciMLBase.solve!(cache::LinearCache, alg::KLUFactorization; kwargs...) if cache.isfresh cacheval = @get_cacheval(cache, :KLUFactorization) if alg.reuse_symbolic - if alg.check_pattern && !(SparseArrays.decrement(SparseArrays.getcolptr(A)) == - cacheval.colptr && - SparseArrays.decrement(SparseArrays.getrowval(A)) == - cacheval.rowval) + if alg.check_pattern && pattern_changed(cacheval, A) fact = KLU.klu( SparseMatrixCSC(size(A)..., getcolptr(A), rowvals(A), nonzeros(A)), diff --git a/src/factorization_sparse.jl b/src/factorization_sparse.jl index 7c2ddb156..995ca90a8 100644 --- a/src/factorization_sparse.jl +++ b/src/factorization_sparse.jl @@ -27,3 +27,9 @@ function _ldiv!(::SVector, b::SVector) (A \ b) end + +function pattern_changed(fact, A::SparseArrays.SparseMatrixCSC) + !(SparseArrays.decrement(SparseArrays.getcolptr(A)) == + fact.colptr && SparseArrays.decrement(SparseArrays.getrowval(A)) == + fact.rowval) +end \ No newline at end of file From a152f8a53c3e1c9caea8e0bb1dab1028480c2b3e Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Fri, 19 Apr 2024 12:56:29 -0400 Subject: [PATCH 2/4] fix dispatch for static --- src/factorization.jl | 2752 +++++++++++++++++++++--------------------- 1 file changed, 1376 insertions(+), 1376 deletions(-) diff --git a/src/factorization.jl b/src/factorization.jl index 7e6a2b2b1..31d52738f 100644 --- a/src/factorization.jl +++ b/src/factorization.jl @@ -1,1376 +1,1376 @@ -macro get_cacheval(cache, algsym) - quote - if $(esc(cache)).alg isa DefaultLinearSolver - getfield($(esc(cache)).cacheval, $algsym) - else - $(esc(cache)).cacheval - end - end -end - -_ldiv!(x, A, b) = ldiv!(x, A, b) - -_ldiv!(x, A, b::SVector) = (x .= A \ b) -_ldiv!(::SVector, A, b::SVector) = (A \ b) -_ldiv!(::SVector, A, b) = (A \ b) - -function _ldiv!(x::Vector, A::Factorization, b::Vector) - # workaround https://github.com/JuliaLang/julia/issues/43507 - # Fallback if working with non-square matrices - length(x) != length(b) && return ldiv!(x, A, b) - copyto!(x, b) - ldiv!(A, x) -end - -# RF Bad fallback: will fail if `A` is just a stand-in -# This should instead just create the factorization type. -function init_cacheval(alg::AbstractFactorization, A, b, u, Pl, Pr, maxiters::Int, abstol, - reltol, verbose::Bool, assumptions::OperatorAssumptions) - do_factorization(alg, convert(AbstractMatrix, A), b, u) -end - -## LU Factorizations - -""" -`LUFactorization(pivot=LinearAlgebra.RowMaximum())` - -Julia's built in `lu`. Equivalent to calling `lu!(A)` - - - On dense matrices, this uses the current BLAS implementation of the user's computer, - which by default is OpenBLAS but will use MKL if the user does `using MKL` in their - system. - - On sparse matrices, this will use UMFPACK from SparseArrays. Note that this will not - cache the symbolic factorization. - - On CuMatrix, it will use a CUDA-accelerated LU from CuSolver. - - On BandedMatrix and BlockBandedMatrix, it will use a banded LU. - -## Positional Arguments - - - pivot: The choice of pivoting. Defaults to `LinearAlgebra.RowMaximum()`. The other choice is - `LinearAlgebra.NoPivot()`. -""" -Base.@kwdef struct LUFactorization{P} <: AbstractFactorization - pivot::P = LinearAlgebra.RowMaximum() - reuse_symbolic::Bool = true - check_pattern::Bool = true # Check factorization re-use -end - -# Legacy dispatch -LUFactorization(pivot) = LUFactorization(;pivot=RowMaximum()) - -""" -`GenericLUFactorization(pivot=LinearAlgebra.RowMaximum())` - -Julia's built in generic LU factorization. Equivalent to calling LinearAlgebra.generic_lufact!. -Supports arbitrary number types but does not achieve as good scaling as BLAS-based LU implementations. -Has low overhead and is good for small matrices. - -## Positional Arguments - - - pivot: The choice of pivoting. Defaults to `LinearAlgebra.RowMaximum()`. The other choice is - `LinearAlgebra.NoPivot()`. -""" -struct GenericLUFactorization{P} <: AbstractFactorization - pivot::P -end - -GenericLUFactorization() = GenericLUFactorization(RowMaximum()) - -function SciMLBase.solve!(cache::LinearCache, alg::LUFactorization; kwargs...) - A = cache.A - A = convert(AbstractMatrix, A) - if cache.isfresh - cacheval = @get_cacheval(cache, :LUFactorization) - if A isa AbstractSparseMatrix && alg.reuse_symbolic - # Caches the symbolic factorization: https://github.com/JuliaLang/julia/pull/33738 - # If SparseMatrixCSC, check if the pattern has changed - if alg.check_pattern && pattern_changed(cacheval, A) - fact = lu(A, check = false) - else - fact = lu!(cacheval, A, check = false) - end - else - fact = lu(A, check = false) - end - cache.cacheval = fact - cache.isfresh = false - end - - F = @get_cacheval(cache, :LUFactorization) - y = ldiv!(cache.u, F, cache.b) - SciMLBase.build_linear_solution(alg, y, nothing, cache) -end - -function do_factorization(alg::LUFactorization, A, b, u) - A = convert(AbstractMatrix, A) - if A isa AbstractSparseMatrixCSC - return lu(SparseMatrixCSC(size(A)..., getcolptr(A), rowvals(A), nonzeros(A)), - check = false) - elseif A isa GPUArraysCore.AnyGPUArray - fact = lu(A; check = false) - elseif !ArrayInterface.can_setindex(typeof(A)) - fact = lu(A, alg.pivot, check = false) - else - fact = lu!(A, alg.pivot, check = false) - end - return fact -end - -function do_factorization(alg::GenericLUFactorization, A, b, u) - A = convert(AbstractMatrix, A) - fact = LinearAlgebra.generic_lufact!(A, alg.pivot, check = false) - return fact -end - -function init_cacheval( - alg::Union{LUFactorization, GenericLUFactorization}, A, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - ArrayInterface.lu_instance(convert(AbstractMatrix, A)) -end - -function init_cacheval(alg::Union{LUFactorization, GenericLUFactorization}, - A::Union{<:Adjoint, <:Transpose}, b, u, Pl, Pr, maxiters::Int, abstol, reltol, - verbose::Bool, assumptions::OperatorAssumptions) - if alg isa LUFactorization - return lu(A; check = false) - else - A isa GPUArraysCore.AnyGPUArray && return nothing - return LinearAlgebra.generic_lufact!(copy(A), alg.pivot; check = false) - end -end - -const PREALLOCATED_LU = ArrayInterface.lu_instance(rand(1, 1)) - -function init_cacheval(alg::Union{LUFactorization, GenericLUFactorization}, - A::Matrix{Float64}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - PREALLOCATED_LU -end - -function init_cacheval(alg::Union{LUFactorization, GenericLUFactorization}, - A::AbstractSciMLOperator, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - nothing -end - -## QRFactorization - -""" -`QRFactorization(pivot=LinearAlgebra.NoPivot(),blocksize=16)` - -Julia's built in `qr`. Equivalent to calling `qr!(A)`. - - - On dense matrices, this uses the current BLAS implementation of the user's computer - which by default is OpenBLAS but will use MKL if the user does `using MKL` in their - system. - - On sparse matrices, this will use SPQR from SparseArrays - - On CuMatrix, it will use a CUDA-accelerated QR from CuSolver. - - On BandedMatrix and BlockBandedMatrix, it will use a banded QR. -""" -struct QRFactorization{P} <: AbstractFactorization - pivot::P - blocksize::Int - inplace::Bool -end - -QRFactorization(inplace = true) = QRFactorization(NoPivot(), 16, inplace) - -function QRFactorization(pivot::LinearAlgebra.PivotingStrategy, inplace::Bool = true) - QRFactorization(pivot, 16, inplace) -end - -function do_factorization(alg::QRFactorization, A, b, u) - A = convert(AbstractMatrix, A) - if ArrayInterface.can_setindex(typeof(A)) - if alg.inplace && !(A isa SparseMatrixCSC) && !(A isa GPUArraysCore.AnyGPUArray) - fact = qr!(A, alg.pivot) - else - fact = qr(A) # CUDA.jl does not allow other args! - end - else - fact = qr(A, alg.pivot) - end - return fact -end - -function init_cacheval(alg::QRFactorization, A, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - ArrayInterface.qr_instance(convert(AbstractMatrix, A), alg.pivot) -end - -const PREALLOCATED_QR_ColumnNorm = ArrayInterface.qr_instance(rand(1, 1), ColumnNorm()) - -function init_cacheval(alg::QRFactorization{ColumnNorm}, A::Matrix{Float64}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - return PREALLOCATED_QR_ColumnNorm -end - -function init_cacheval( - alg::QRFactorization, A::Union{<:Adjoint, <:Transpose}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - A isa GPUArraysCore.AnyGPUArray && return qr(A) - return qr(A, alg.pivot) -end - -const PREALLOCATED_QR_NoPivot = ArrayInterface.qr_instance(rand(1, 1)) - -function init_cacheval(alg::QRFactorization{NoPivot}, A::Matrix{Float64}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - return PREALLOCATED_QR_NoPivot -end - -function init_cacheval(alg::QRFactorization, A::AbstractSciMLOperator, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - nothing -end - -## CholeskyFactorization - -""" -`CholeskyFactorization(; pivot = nothing, tol = 0.0, shift = 0.0, perm = nothing)` - -Julia's built in `cholesky`. Equivalent to calling `cholesky!(A)`. - -## Keyword Arguments - - - pivot: defaluts to NoPivot, can also be RowMaximum. - - tol: the tol argument in CHOLMOD. Only used for sparse matrices. - - shift: the shift argument in CHOLMOD. Only used for sparse matrices. - - perm: the perm argument in CHOLMOD. Only used for sparse matrices. -""" -struct CholeskyFactorization{P, P2} <: AbstractFactorization - pivot::P - tol::Int - shift::Float64 - perm::P2 -end - -function CholeskyFactorization(; pivot = nothing, tol = 0.0, shift = 0.0, perm = nothing) - pivot === nothing && (pivot = NoPivot()) - CholeskyFactorization(pivot, 16, shift, perm) -end - -function do_factorization(alg::CholeskyFactorization, A, b, u) - A = convert(AbstractMatrix, A) - if A isa SparseMatrixCSC - fact = cholesky(A; shift = alg.shift, check = false, perm = alg.perm) - elseif A isa GPUArraysCore.AnyGPUArray - fact = cholesky(A; check = false) - elseif alg.pivot === Val(false) || alg.pivot === NoPivot() - fact = cholesky!(A, alg.pivot; check = false) - else - fact = cholesky!(A, alg.pivot; tol = alg.tol, check = false) - end - return fact -end - -function init_cacheval(alg::CholeskyFactorization, A::SMatrix{S1, S2}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) where {S1, S2} - cholesky(A) -end - -function init_cacheval(alg::CholeskyFactorization, A::GPUArraysCore.AnyGPUArray, b, u, Pl, - Pr, maxiters::Int, abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - cholesky(A; check = false) -end - -function init_cacheval(alg::CholeskyFactorization, A, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - ArrayInterface.cholesky_instance(convert(AbstractMatrix, A), alg.pivot) -end - -const PREALLOCATED_CHOLESKY = ArrayInterface.cholesky_instance(rand(1, 1), NoPivot()) - -function init_cacheval(alg::CholeskyFactorization, A::Matrix{Float64}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - PREALLOCATED_CHOLESKY -end - -function init_cacheval(alg::CholeskyFactorization, - A::Union{Diagonal, AbstractSciMLOperator}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - nothing -end - -## LDLtFactorization - -struct LDLtFactorization{T} <: AbstractFactorization - shift::Float64 - perm::T -end - -function LDLtFactorization(shift = 0.0, perm = nothing) - LDLtFactorization(shift, perm) -end - -function do_factorization(alg::LDLtFactorization, A, b, u) - A = convert(AbstractMatrix, A) - if !(A isa SparseMatrixCSC) - fact = ldlt!(A) - else - fact = ldlt!(A, shift = alg.shift, perm = alg.perm) - end - return fact -end - -function init_cacheval(alg::LDLtFactorization, A, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, - verbose::Bool, assumptions::OperatorAssumptions) - nothing -end - -function init_cacheval(alg::LDLtFactorization, A::SymTridiagonal, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - ArrayInterface.ldlt_instance(convert(AbstractMatrix, A)) -end - -## SVDFactorization - -""" -`SVDFactorization(full=false,alg=LinearAlgebra.DivideAndConquer())` - -Julia's built in `svd`. Equivalent to `svd!(A)`. - - - On dense matrices, this uses the current BLAS implementation of the user's computer - which by default is OpenBLAS but will use MKL if the user does `using MKL` in their - system. -""" -struct SVDFactorization{A} <: AbstractFactorization - full::Bool - alg::A -end - -SVDFactorization() = SVDFactorization(false, LinearAlgebra.DivideAndConquer()) - -function do_factorization(alg::SVDFactorization, A, b, u) - A = convert(AbstractMatrix, A) - if ArrayInterface.can_setindex(typeof(A)) - fact = svd!(A; alg.full, alg.alg) - else - fact = svd(A; alg.full) - end - return fact -end - -function init_cacheval(alg::SVDFactorization, A::Union{Matrix, SMatrix}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - ArrayInterface.svd_instance(convert(AbstractMatrix, A)) -end - -const PREALLOCATED_SVD = ArrayInterface.svd_instance(rand(1, 1)) - -function init_cacheval(alg::SVDFactorization, A::Matrix{Float64}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - PREALLOCATED_SVD -end - -function init_cacheval(alg::SVDFactorization, A, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - nothing -end - -## BunchKaufmanFactorization - -""" -`BunchKaufmanFactorization(; rook = false)` - -Julia's built in `bunchkaufman`. Equivalent to calling `bunchkaufman(A)`. -Only for Symmetric matrices. - -## Keyword Arguments - - - rook: whether to perform rook pivoting. Defaults to false. -""" -Base.@kwdef struct BunchKaufmanFactorization <: AbstractFactorization - rook::Bool = false -end - -function do_factorization(alg::BunchKaufmanFactorization, A, b, u) - A = convert(AbstractMatrix, A) - fact = bunchkaufman!(A, alg.rook; check = false) - return fact -end - -function init_cacheval(alg::BunchKaufmanFactorization, A::Symmetric{<:Number, <:Matrix}, b, - u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - ArrayInterface.bunchkaufman_instance(convert(AbstractMatrix, A)) -end - -const PREALLOCATED_BUNCHKAUFMAN = ArrayInterface.bunchkaufman_instance(Symmetric(rand(1, - 1))) - -function init_cacheval(alg::BunchKaufmanFactorization, - A::Symmetric{Float64, Matrix{Float64}}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - PREALLOCATED_BUNCHKAUFMAN -end - -function init_cacheval(alg::BunchKaufmanFactorization, A, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - nothing -end - -## GenericFactorization - -""" -`GenericFactorization(;fact_alg=LinearAlgebra.factorize)`: Constructs a linear solver from a generic -factorization algorithm `fact_alg` which complies with the Base.LinearAlgebra -factorization API. Quoting from Base: - - * If `A` is upper or lower triangular (or diagonal), no factorization of `A` is - required. The system is then solved with either forward or backward substitution. - For non-triangular square matrices, an LU factorization is used. - For rectangular `A` the result is the minimum-norm least squares solution computed by a - pivoted QR factorization of `A` and a rank estimate of `A` based on the R factor. - When `A` is sparse, a similar polyalgorithm is used. For indefinite matrices, the `LDLt` - factorization does not use pivoting during the numerical factorization and therefore the - procedure can fail even for invertible matrices. - -## Keyword Arguments - - - fact_alg: the factorization algorithm to use. Defaults to `LinearAlgebra.factorize`, but can be - swapped to choices like `lu`, `qr` -""" -struct GenericFactorization{F} <: AbstractFactorization - fact_alg::F -end - -GenericFactorization(; fact_alg = LinearAlgebra.factorize) = GenericFactorization(fact_alg) - -function do_factorization(alg::GenericFactorization, A, b, u) - A = convert(AbstractMatrix, A) - fact = alg.fact_alg(A) - return fact -end - -function init_cacheval( - alg::GenericFactorization{typeof(lu)}, A::AbstractMatrix, b, u, Pl, Pr, - maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - ArrayInterface.lu_instance(A) -end -function init_cacheval( - alg::GenericFactorization{typeof(lu!)}, A::AbstractMatrix, b, u, Pl, Pr, - maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - ArrayInterface.lu_instance(A) -end - -function init_cacheval(alg::GenericFactorization{typeof(lu)}, - A::StridedMatrix{<:LinearAlgebra.BlasFloat}, b, u, Pl, Pr, - maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - ArrayInterface.lu_instance(A) -end -function init_cacheval(alg::GenericFactorization{typeof(lu!)}, - A::StridedMatrix{<:LinearAlgebra.BlasFloat}, b, u, Pl, Pr, - maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - ArrayInterface.lu_instance(A) -end -function init_cacheval(alg::GenericFactorization{typeof(lu)}, A::Diagonal, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - Diagonal(inv.(A.diag)) -end -function init_cacheval(alg::GenericFactorization{typeof(lu)}, A::Tridiagonal, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - ArrayInterface.lu_instance(A) -end -function init_cacheval(alg::GenericFactorization{typeof(lu!)}, A::Diagonal, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - Diagonal(inv.(A.diag)) -end -function init_cacheval( - alg::GenericFactorization{typeof(lu!)}, A::Tridiagonal, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - ArrayInterface.lu_instance(A) -end -function init_cacheval( - alg::GenericFactorization{typeof(lu!)}, A::SymTridiagonal{T, V}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) where {T, V} - LinearAlgebra.LDLt{T, SymTridiagonal{T, V}}(A) -end -function init_cacheval( - alg::GenericFactorization{typeof(lu)}, A::SymTridiagonal{T, V}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) where {T, V} - LinearAlgebra.LDLt{T, SymTridiagonal{T, V}}(A) -end - -function init_cacheval( - alg::GenericFactorization{typeof(qr)}, A::AbstractMatrix, b, u, Pl, Pr, - maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - ArrayInterface.qr_instance(A) -end -function init_cacheval( - alg::GenericFactorization{typeof(qr!)}, A::AbstractMatrix, b, u, Pl, Pr, - maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - ArrayInterface.qr_instance(A) -end -function init_cacheval( - alg::GenericFactorization{typeof(qr)}, A::SymTridiagonal{T, V}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) where {T, V} - LinearAlgebra.LDLt{T, SymTridiagonal{T, V}}(A) -end -function init_cacheval( - alg::GenericFactorization{typeof(qr!)}, A::SymTridiagonal{T, V}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) where {T, V} - LinearAlgebra.LDLt{T, SymTridiagonal{T, V}}(A) -end - -function init_cacheval(alg::GenericFactorization{typeof(qr)}, - A::StridedMatrix{<:LinearAlgebra.BlasFloat}, b, u, Pl, Pr, - maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - ArrayInterface.qr_instance(A) -end -function init_cacheval(alg::GenericFactorization{typeof(qr!)}, - A::StridedMatrix{<:LinearAlgebra.BlasFloat}, b, u, Pl, Pr, - maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - ArrayInterface.qr_instance(A) -end -function init_cacheval(alg::GenericFactorization{typeof(qr)}, A::Diagonal, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - Diagonal(inv.(A.diag)) -end -function init_cacheval(alg::GenericFactorization{typeof(qr)}, A::Tridiagonal, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - ArrayInterface.qr_instance(A) -end -function init_cacheval(alg::GenericFactorization{typeof(qr!)}, A::Diagonal, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - Diagonal(inv.(A.diag)) -end -function init_cacheval( - alg::GenericFactorization{typeof(qr!)}, A::Tridiagonal, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - ArrayInterface.qr_instance(A) -end - -function init_cacheval( - alg::GenericFactorization{typeof(svd)}, A::AbstractMatrix, b, u, Pl, Pr, - maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - ArrayInterface.svd_instance(A) -end -function init_cacheval( - alg::GenericFactorization{typeof(svd!)}, A::AbstractMatrix, b, u, Pl, Pr, - maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - ArrayInterface.svd_instance(A) -end - -function init_cacheval(alg::GenericFactorization{typeof(svd)}, - A::StridedMatrix{<:LinearAlgebra.BlasFloat}, b, u, Pl, Pr, - maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - ArrayInterface.svd_instance(A) -end -function init_cacheval(alg::GenericFactorization{typeof(svd!)}, - A::StridedMatrix{<:LinearAlgebra.BlasFloat}, b, u, Pl, Pr, - maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - ArrayInterface.svd_instance(A) -end -function init_cacheval(alg::GenericFactorization{typeof(svd)}, A::Diagonal, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - Diagonal(inv.(A.diag)) -end -function init_cacheval( - alg::GenericFactorization{typeof(svd)}, A::Tridiagonal, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - ArrayInterface.svd_instance(A) -end -function init_cacheval(alg::GenericFactorization{typeof(svd!)}, A::Diagonal, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - Diagonal(inv.(A.diag)) -end -function init_cacheval(alg::GenericFactorization{typeof(svd!)}, A::Tridiagonal, b, u, Pl, - Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - ArrayInterface.svd_instance(A) -end -function init_cacheval( - alg::GenericFactorization{typeof(svd!)}, A::SymTridiagonal{T, V}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) where {T, V} - LinearAlgebra.LDLt{T, SymTridiagonal{T, V}}(A) -end -function init_cacheval( - alg::GenericFactorization{typeof(svd)}, A::SymTridiagonal{T, V}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) where {T, V} - LinearAlgebra.LDLt{T, SymTridiagonal{T, V}}(A) -end - -function init_cacheval(alg::GenericFactorization, A::Diagonal, b, u, Pl, Pr, maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - Diagonal(inv.(A.diag)) -end -function init_cacheval(alg::GenericFactorization, A::Tridiagonal, b, u, Pl, Pr, - maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - ArrayInterface.lu_instance(A) -end -function init_cacheval(alg::GenericFactorization, A::SymTridiagonal{T, V}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) where {T, V} - LinearAlgebra.LDLt{T, SymTridiagonal{T, V}}(A) -end -function init_cacheval(alg::GenericFactorization, A, b, u, Pl, Pr, - maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - init_cacheval(alg, convert(AbstractMatrix, A), b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) -end -function init_cacheval(alg::GenericFactorization, A::AbstractMatrix, b, u, Pl, Pr, - maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - do_factorization(alg, A, b, u) -end - -function init_cacheval( - alg::Union{GenericFactorization{typeof(bunchkaufman!)}, - GenericFactorization{typeof(bunchkaufman)}}, - A::Union{Hermitian, Symmetric}, b, u, Pl, Pr, maxiters::Int, abstol, - reltol, verbose::Bool, assumptions::OperatorAssumptions) - BunchKaufman(A.data, Array(1:size(A, 1)), A.uplo, true, false, 0) -end - -function init_cacheval( - alg::Union{GenericFactorization{typeof(bunchkaufman!)}, - GenericFactorization{typeof(bunchkaufman)}}, - A::StridedMatrix{<:LinearAlgebra.BlasFloat}, b, u, Pl, Pr, - maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - if eltype(A) <: Complex - return bunchkaufman!(Hermitian(A)) - else - return bunchkaufman!(Symmetric(A)) - end -end - -# Fallback, tries to make nonsingular and just factorizes -# Try to never use it. - -# Cholesky needs the posdef matrix, for GenericFactorization assume structure is needed -function init_cacheval( - alg::GenericFactorization{typeof(cholesky)}, A::AbstractMatrix, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - newA = copy(convert(AbstractMatrix, A)) - do_factorization(alg, newA, b, u) -end -function init_cacheval( - alg::GenericFactorization{typeof(cholesky!)}, A::AbstractMatrix, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - newA = copy(convert(AbstractMatrix, A)) - do_factorization(alg, newA, b, u) -end -function init_cacheval(alg::GenericFactorization{typeof(cholesky!)}, - A::Diagonal, b, u, Pl, Pr, maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - Diagonal(inv.(A.diag)) -end -function init_cacheval( - alg::GenericFactorization{typeof(cholesky!)}, A::Tridiagonal, b, u, Pl, Pr, - maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - ArrayInterface.lu_instance(A) -end -function init_cacheval( - alg::GenericFactorization{typeof(cholesky!)}, A::SymTridiagonal{T, V}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) where {T, V} - LinearAlgebra.LDLt{T, SymTridiagonal{T, V}}(A) -end -function init_cacheval(alg::GenericFactorization{typeof(cholesky)}, - A::Diagonal, b, u, Pl, Pr, maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - Diagonal(inv.(A.diag)) -end -function init_cacheval( - alg::GenericFactorization{typeof(cholesky)}, A::Tridiagonal, b, u, Pl, Pr, - maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - ArrayInterface.lu_instance(A) -end -function init_cacheval( - alg::GenericFactorization{typeof(cholesky)}, A::SymTridiagonal{T, V}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) where {T, V} - LinearAlgebra.LDLt{T, SymTridiagonal{T, V}}(A) -end - -function init_cacheval(alg::GenericFactorization, - A::Union{Hermitian{T, <:SparseMatrixCSC}, - Symmetric{T, <:SparseMatrixCSC}}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) where {T} - newA = copy(convert(AbstractMatrix, A)) - do_factorization(alg, newA, b, u) -end - -# Ambiguity handling dispatch - -################################## Factorizations which require solve! overloads - -""" -`UMFPACKFactorization(;reuse_symbolic=true, check_pattern=true)` - -A fast sparse multithreaded LU-factorization which specializes on sparsity -patterns with “more structure”. - -!!! note - - By default, the SparseArrays.jl are implemented for efficiency by caching the - symbolic factorization. I.e., if `set_A` is used, it is expected that the new - `A` has the same sparsity pattern as the previous `A`. If this algorithm is to - be used in a context where that assumption does not hold, set `reuse_symbolic=false`. -""" -Base.@kwdef struct UMFPACKFactorization <: AbstractFactorization - reuse_symbolic::Bool = true - check_pattern::Bool = true # Check factorization re-use -end - -const PREALLOCATED_UMFPACK = SparseArrays.UMFPACK.UmfpackLU(SparseMatrixCSC(0, 0, [1], - Int[], Float64[])) - -function init_cacheval(alg::UMFPACKFactorization, - A, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, - verbose::Bool, assumptions::OperatorAssumptions) - nothing -end - -function init_cacheval(alg::UMFPACKFactorization, A::SparseMatrixCSC{Float64, Int}, b, u, - Pl, Pr, - maxiters::Int, abstol, reltol, - verbose::Bool, assumptions::OperatorAssumptions) - PREALLOCATED_UMFPACK -end - -function init_cacheval(alg::UMFPACKFactorization, A::AbstractSparseArray, b, u, Pl, Pr, - maxiters::Int, abstol, - reltol, - verbose::Bool, assumptions::OperatorAssumptions) - A = convert(AbstractMatrix, A) - zerobased = SparseArrays.getcolptr(A)[1] == 0 - return SparseArrays.UMFPACK.UmfpackLU(SparseMatrixCSC(size(A)..., getcolptr(A), - rowvals(A), nonzeros(A))) -end - -function SciMLBase.solve!(cache::LinearCache, alg::UMFPACKFactorization; kwargs...) - A = cache.A - A = convert(AbstractMatrix, A) - if cache.isfresh - cacheval = @get_cacheval(cache, :UMFPACKFactorization) - if alg.reuse_symbolic - # Caches the symbolic factorization: https://github.com/JuliaLang/julia/pull/33738 - if alg.check_pattern && pattern_changed(cacheval, A) - fact = lu( - SparseMatrixCSC(size(A)..., getcolptr(A), rowvals(A), - nonzeros(A)), - check = false) - else - fact = lu!(cacheval, - SparseMatrixCSC(size(A)..., getcolptr(A), rowvals(A), - nonzeros(A)), check = false) - end - else - fact = lu(SparseMatrixCSC(size(A)..., getcolptr(A), rowvals(A), nonzeros(A)), - check = false) - end - cache.cacheval = fact - cache.isfresh = false - end - - F = @get_cacheval(cache, :UMFPACKFactorization) - if F.status == SparseArrays.UMFPACK.UMFPACK_OK - y = ldiv!(cache.u, F, cache.b) - SciMLBase.build_linear_solution(alg, y, nothing, cache) - else - SciMLBase.build_linear_solution( - alg, cache.u, nothing, cache; retcode = ReturnCode.Infeasible) - end -end - -""" -`KLUFactorization(;reuse_symbolic=true, check_pattern=true)` - -A fast sparse LU-factorization which specializes on sparsity patterns with “less structure”. - -!!! note - - By default, the SparseArrays.jl are implemented for efficiency by caching the - symbolic factorization. I.e., if `set_A` is used, it is expected that the new - `A` has the same sparsity pattern as the previous `A`. If this algorithm is to - be used in a context where that assumption does not hold, set `reuse_symbolic=false`. -""" -Base.@kwdef struct KLUFactorization <: AbstractFactorization - reuse_symbolic::Bool = true - check_pattern::Bool = true -end - -const PREALLOCATED_KLU = KLU.KLUFactorization(SparseMatrixCSC(0, 0, [1], Int[], - Float64[])) - -function init_cacheval(alg::KLUFactorization, - A, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, - verbose::Bool, assumptions::OperatorAssumptions) - nothing -end - -function init_cacheval(alg::KLUFactorization, A::SparseMatrixCSC{Float64, Int}, b, u, Pl, - Pr, - maxiters::Int, abstol, reltol, - verbose::Bool, assumptions::OperatorAssumptions) - PREALLOCATED_KLU -end - -function init_cacheval(alg::KLUFactorization, A::AbstractSparseArray, b, u, Pl, Pr, - maxiters::Int, abstol, - reltol, - verbose::Bool, assumptions::OperatorAssumptions) - A = convert(AbstractMatrix, A) - return KLU.KLUFactorization(SparseMatrixCSC(size(A)..., getcolptr(A), rowvals(A), - nonzeros(A))) -end - -# TODO: guard this against errors -function SciMLBase.solve!(cache::LinearCache, alg::KLUFactorization; kwargs...) - A = cache.A - A = convert(AbstractMatrix, A) - if cache.isfresh - cacheval = @get_cacheval(cache, :KLUFactorization) - if alg.reuse_symbolic - if alg.check_pattern && pattern_changed(cacheval, A) - fact = KLU.klu( - SparseMatrixCSC(size(A)..., getcolptr(A), rowvals(A), - nonzeros(A)), - check = false) - else - fact = KLU.klu!(cacheval, nonzeros(A), check = false) - end - else - # New fact each time since the sparsity pattern can change - # and thus it needs to reallocate - fact = KLU.klu(SparseMatrixCSC(size(A)..., getcolptr(A), rowvals(A), - nonzeros(A))) - end - cache.cacheval = fact - cache.isfresh = false - end - F = @get_cacheval(cache, :KLUFactorization) - if F.common.status == KLU.KLU_OK - y = ldiv!(cache.u, F, cache.b) - SciMLBase.build_linear_solution(alg, y, nothing, cache) - else - SciMLBase.build_linear_solution( - alg, cache.u, nothing, cache; retcode = ReturnCode.Infeasible) - end -end - -## CHOLMODFactorization - -""" -`CHOLMODFactorization(; shift = 0.0, perm = nothing)` - -A wrapper of CHOLMOD's polyalgorithm, mixing Cholesky factorization and ldlt. -Tries cholesky for performance and retries ldlt if conditioning causes Cholesky -to fail. - -Only supports sparse matrices. - -## Keyword Arguments - - - shift: the shift argument in CHOLMOD. - - perm: the perm argument in CHOLMOD -""" -Base.@kwdef struct CHOLMODFactorization{T} <: AbstractFactorization - shift::Float64 = 0.0 - perm::T = nothing -end - -const PREALLOCATED_CHOLMOD = cholesky(SparseMatrixCSC(0, 0, [1], Int[], Float64[])) - -function init_cacheval(alg::CHOLMODFactorization, - A, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, - verbose::Bool, assumptions::OperatorAssumptions) - nothing -end - -function init_cacheval(alg::CHOLMODFactorization, - A::Union{SparseMatrixCSC{T, Int}, Symmetric{T, SparseMatrixCSC{T, Int}}}, b, u, - Pl, Pr, - maxiters::Int, abstol, reltol, - verbose::Bool, assumptions::OperatorAssumptions) where {T <: - Union{Float32, Float64}} - PREALLOCATED_CHOLMOD -end - -function SciMLBase.solve!(cache::LinearCache, alg::CHOLMODFactorization; kwargs...) - A = cache.A - A = convert(AbstractMatrix, A) - - if cache.isfresh - cacheval = @get_cacheval(cache, :CHOLMODFactorization) - fact = cholesky(A; check = false) - if !LinearAlgebra.issuccess(fact) - ldlt!(fact, A; check = false) - end - cache.cacheval = fact - cache.isfresh = false - end - - cache.u .= @get_cacheval(cache, :CHOLMODFactorization) \ cache.b - SciMLBase.build_linear_solution(alg, cache.u, nothing, cache) -end - -## RFLUFactorization - -""" -`RFLUFactorization()` - -A fast pure Julia LU-factorization implementation -using RecursiveFactorization.jl. This is by far the fastest LU-factorization -implementation, usually outperforming OpenBLAS and MKL for smaller matrices -(<500x500), but currently optimized only for Base `Array` with `Float32` or `Float64`. -Additional optimization for complex matrices is in the works. -""" -struct RFLUFactorization{P, T} <: AbstractFactorization - RFLUFactorization(::Val{P}, ::Val{T}) where {P, T} = new{P, T}() -end - -function RFLUFactorization(; pivot = Val(true), thread = Val(true)) - RFLUFactorization(pivot, thread) -end - -function init_cacheval(alg::RFLUFactorization, A, b, u, Pl, Pr, maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - ipiv = Vector{LinearAlgebra.BlasInt}(undef, min(size(A)...)) - ArrayInterface.lu_instance(convert(AbstractMatrix, A)), ipiv -end - -function init_cacheval(alg::RFLUFactorization, A::Matrix{Float64}, b, u, Pl, Pr, - maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - ipiv = Vector{LinearAlgebra.BlasInt}(undef, 0) - PREALLOCATED_LU, ipiv -end - -function init_cacheval(alg::RFLUFactorization, - A::Union{AbstractSparseArray, AbstractSciMLOperator}, b, u, Pl, Pr, - maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - nothing, nothing -end - -function init_cacheval(alg::RFLUFactorization, - A::Union{Diagonal, SymTridiagonal, Tridiagonal}, b, u, Pl, Pr, - maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - nothing, nothing -end - -function SciMLBase.solve!(cache::LinearCache, alg::RFLUFactorization{P, T}; - kwargs...) where {P, T} - A = cache.A - A = convert(AbstractMatrix, A) - fact, ipiv = @get_cacheval(cache, :RFLUFactorization) - if cache.isfresh - if length(ipiv) != min(size(A)...) - ipiv = Vector{LinearAlgebra.BlasInt}(undef, min(size(A)...)) - end - fact = RecursiveFactorization.lu!(A, ipiv, Val(P), Val(T), check = false) - cache.cacheval = (fact, ipiv) - cache.isfresh = false - end - y = ldiv!(cache.u, @get_cacheval(cache, :RFLUFactorization)[1], cache.b) - SciMLBase.build_linear_solution(alg, y, nothing, cache) -end - -## NormalCholeskyFactorization - -""" -`NormalCholeskyFactorization(pivot = RowMaximum())` - -A fast factorization which uses a Cholesky factorization on A * A'. Can be much -faster than LU factorization, but is not as numerically stable and thus should only -be applied to well-conditioned matrices. - -## Positional Arguments - - - pivot: Defaults to RowMaximum(), but can be NoPivot() -""" -struct NormalCholeskyFactorization{P} <: AbstractFactorization - pivot::P -end - -function NormalCholeskyFactorization(; pivot = nothing) - pivot === nothing && (pivot = NoPivot()) - NormalCholeskyFactorization(pivot) -end - -default_alias_A(::NormalCholeskyFactorization, ::Any, ::Any) = true -default_alias_b(::NormalCholeskyFactorization, ::Any, ::Any) = true - -const PREALLOCATED_NORMALCHOLESKY = ArrayInterface.cholesky_instance(rand(1, 1), NoPivot()) - -function init_cacheval(alg::NormalCholeskyFactorization, - A::Union{AbstractSparseArray, GPUArraysCore.AnyGPUArray, - Symmetric{<:Number, <:AbstractSparseArray}}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - ArrayInterface.cholesky_instance(convert(AbstractMatrix, A)) -end - -function init_cacheval(alg::NormalCholeskyFactorization, A::SMatrix, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - return cholesky(Symmetric((A)' * A)) -end - -function init_cacheval(alg::NormalCholeskyFactorization, A, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - A_ = convert(AbstractMatrix, A) - return ArrayInterface.cholesky_instance( - Symmetric(Matrix{eltype(A)}(undef, 0, 0)), alg.pivot) -end - -const PREALLOCATED_NORMALCHOLESKY_SYMMETRIC = ArrayInterface.cholesky_instance( - Symmetric(rand(1, 1)), NoPivot()) - -function init_cacheval(alg::NormalCholeskyFactorization, A::Matrix{Float64}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - return PREALLOCATED_NORMALCHOLESKY_SYMMETRIC -end - -function init_cacheval(alg::NormalCholeskyFactorization, - A::Union{Diagonal, AbstractSciMLOperator}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - nothing -end - -function SciMLBase.solve!(cache::LinearCache, alg::NormalCholeskyFactorization; kwargs...) - A = cache.A - A = convert(AbstractMatrix, A) - if cache.isfresh - if A isa SparseMatrixCSC || A isa GPUArraysCore.AnyGPUArray || A isa SMatrix - fact = cholesky(Symmetric((A)' * A); check = false) - else - fact = cholesky(Symmetric((A)' * A), alg.pivot; check = false) - end - cache.cacheval = fact - cache.isfresh = false - end - if A isa SparseMatrixCSC - cache.u .= @get_cacheval(cache, :NormalCholeskyFactorization) \ (A' * cache.b) - y = cache.u - elseif A isa StaticArray - cache.u = @get_cacheval(cache, :NormalCholeskyFactorization) \ (A' * cache.b) - y = cache.u - else - y = ldiv!(cache.u, @get_cacheval(cache, :NormalCholeskyFactorization), A' * cache.b) - end - SciMLBase.build_linear_solution(alg, y, nothing, cache) -end - -## NormalBunchKaufmanFactorization - -""" -`NormalBunchKaufmanFactorization(rook = false)` - -A fast factorization which uses a BunchKaufman factorization on A * A'. Can be much -faster than LU factorization, but is not as numerically stable and thus should only -be applied to well-conditioned matrices. - -## Positional Arguments - - - rook: whether to perform rook pivoting. Defaults to false. -""" -struct NormalBunchKaufmanFactorization <: AbstractFactorization - rook::Bool -end - -function NormalBunchKaufmanFactorization(; rook = false) - NormalBunchKaufmanFactorization(rook) -end - -default_alias_A(::NormalBunchKaufmanFactorization, ::Any, ::Any) = true -default_alias_b(::NormalBunchKaufmanFactorization, ::Any, ::Any) = true - -function init_cacheval(alg::NormalBunchKaufmanFactorization, A, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - ArrayInterface.bunchkaufman_instance(convert(AbstractMatrix, A)) -end - -function SciMLBase.solve!(cache::LinearCache, alg::NormalBunchKaufmanFactorization; - kwargs...) - A = cache.A - A = convert(AbstractMatrix, A) - if cache.isfresh - fact = bunchkaufman(Symmetric((A)' * A), alg.rook) - cache.cacheval = fact - cache.isfresh = false - end - y = ldiv!(cache.u, @get_cacheval(cache, :NormalBunchKaufmanFactorization), A' * cache.b) - SciMLBase.build_linear_solution(alg, y, nothing, cache) -end - -## DiagonalFactorization - -""" -`DiagonalFactorization()` - -A special implementation only for solving `Diagonal` matrices fast. -""" -struct DiagonalFactorization <: AbstractFactorization end - -function init_cacheval(alg::DiagonalFactorization, A, b, u, Pl, Pr, maxiters::Int, - abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - nothing -end - -function SciMLBase.solve!(cache::LinearCache, alg::DiagonalFactorization; - kwargs...) - A = convert(AbstractMatrix, cache.A) - if cache.u isa Vector && cache.b isa Vector - @simd ivdep for i in eachindex(cache.u) - cache.u[i] = A.diag[i] \ cache.b[i] - end - else - cache.u .= A.diag .\ cache.b - end - SciMLBase.build_linear_solution(alg, cache.u, nothing, cache) -end - -## FastLAPACKFactorizations - -struct WorkspaceAndFactors{W, F} - workspace::W - factors::F -end - -# There's no options like pivot here. -# But I'm not sure it makes sense as a GenericFactorization -# since it just uses `LAPACK.getrf!`. -""" -`FastLUFactorization()` - -The FastLapackInterface.jl version of the LU factorization. Notably, -this version does not allow for choice of pivoting method. -""" -struct FastLUFactorization <: AbstractFactorization end - -function init_cacheval(::FastLUFactorization, A, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - ws = LUWs(A) - return WorkspaceAndFactors(ws, ArrayInterface.lu_instance(convert(AbstractMatrix, A))) -end - -function SciMLBase.solve!(cache::LinearCache, alg::FastLUFactorization; kwargs...) - A = cache.A - A = convert(AbstractMatrix, A) - ws_and_fact = @get_cacheval(cache, :FastLUFactorization) - if cache.isfresh - # we will fail here if A is a different *size* than in a previous version of the same cache. - # it may instead be desirable to resize the workspace. - @set! ws_and_fact.factors = LinearAlgebra.LU(LAPACK.getrf!(ws_and_fact.workspace, - A)...) - cache.cacheval = ws_and_fact - cache.isfresh = false - end - y = ldiv!(cache.u, cache.cacheval.factors, cache.b) - SciMLBase.build_linear_solution(alg, y, nothing, cache) -end - -""" -`FastQRFactorization()` - -The FastLapackInterface.jl version of the QR factorization. -""" -struct FastQRFactorization{P} <: AbstractFactorization - pivot::P - blocksize::Int -end - -# is 36 or 16 better here? LinearAlgebra and FastLapackInterface use 36, -# but QRFactorization uses 16. -FastQRFactorization() = FastQRFactorization(NoPivot(), 36) - -function init_cacheval(alg::FastQRFactorization{NoPivot}, A::AbstractMatrix, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - ws = QRWYWs(A; blocksize = alg.blocksize) - return WorkspaceAndFactors(ws, - ArrayInterface.qr_instance(convert(AbstractMatrix, A))) -end -function init_cacheval(::FastQRFactorization{ColumnNorm}, A::AbstractMatrix, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - ws = QRpWs(A) - return WorkspaceAndFactors(ws, - ArrayInterface.qr_instance(convert(AbstractMatrix, A))) -end - -function init_cacheval(alg::FastQRFactorization, A, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - return init_cacheval(alg, convert(AbstractMatrix, A), b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) -end - -function SciMLBase.solve!(cache::LinearCache, alg::FastQRFactorization{P}; - kwargs...) where {P} - A = cache.A - A = convert(AbstractMatrix, A) - ws_and_fact = @get_cacheval(cache, :FastQRFactorization) - if cache.isfresh - # we will fail here if A is a different *size* than in a previous version of the same cache. - # it may instead be desirable to resize the workspace. - if P === NoPivot - @set! ws_and_fact.factors = LinearAlgebra.QRCompactWY(LAPACK.geqrt!( - ws_and_fact.workspace, - A)...) - else - @set! ws_and_fact.factors = LinearAlgebra.QRPivoted(LAPACK.geqp3!( - ws_and_fact.workspace, - A)...) - end - cache.cacheval = ws_and_fact - cache.isfresh = false - end - y = ldiv!(cache.u, cache.cacheval.factors, cache.b) - SciMLBase.build_linear_solution(alg, y, nothing, cache) -end - -## SparspakFactorization is here since it's MIT licensed, not GPL - -""" -`SparspakFactorization(reuse_symbolic = true)` - -This is the translation of the well-known sparse matrix software Sparspak -(Waterloo Sparse Matrix Package), solving -large sparse systems of linear algebraic equations. Sparspak is composed of the -subroutines from the book "Computer Solution of Large Sparse Positive Definite -Systems" by Alan George and Joseph Liu. Originally written in Fortran 77, later -rewritten in Fortran 90. Here is the software translated into Julia. - -The Julia rewrite is released under the MIT license with an express permission -from the authors of the Fortran package. The package uses multiple -dispatch to route around standard BLAS routines in the case e.g. of arbitrary-precision -floating point numbers or ForwardDiff.Dual. -This e.g. allows for Automatic Differentiation (AD) of a sparse-matrix solve. -""" -Base.@kwdef struct SparspakFactorization <: AbstractFactorization - reuse_symbolic::Bool = true -end - -const PREALLOCATED_SPARSEPAK = sparspaklu(SparseMatrixCSC(0, 0, [1], Int[], Float64[]), - factorize = false) - -function init_cacheval(alg::SparspakFactorization, - A::Union{Matrix, Nothing, AbstractSciMLOperator}, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, - verbose::Bool, assumptions::OperatorAssumptions) - nothing -end - -function init_cacheval(::SparspakFactorization, A::SparseMatrixCSC{Float64, Int}, b, u, Pl, - Pr, maxiters::Int, abstol, - reltol, - verbose::Bool, assumptions::OperatorAssumptions) - PREALLOCATED_SPARSEPAK -end - -function init_cacheval(::SparspakFactorization, A, b, u, Pl, Pr, maxiters::Int, abstol, - reltol, - verbose::Bool, assumptions::OperatorAssumptions) - A = convert(AbstractMatrix, A) - if A isa SparseArrays.AbstractSparseArray - return sparspaklu( - SparseMatrixCSC(size(A)..., getcolptr(A), rowvals(A), - nonzeros(A)), - factorize = false) - else - return sparspaklu(SparseMatrixCSC(0, 0, [1], Int[], eltype(A)[]), - factorize = false) - end -end - -function init_cacheval(::SparspakFactorization, ::StaticArray, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - nothing -end - -function SciMLBase.solve!(cache::LinearCache, alg::SparspakFactorization; kwargs...) - A = cache.A - if cache.isfresh - if cache.cacheval !== nothing && alg.reuse_symbolic - fact = sparspaklu!(@get_cacheval(cache, :SparspakFactorization), - SparseMatrixCSC(size(A)..., getcolptr(A), rowvals(A), - nonzeros(A))) - else - fact = sparspaklu(SparseMatrixCSC(size(A)..., getcolptr(A), rowvals(A), - nonzeros(A))) - end - cache.cacheval = fact - cache.isfresh = false - end - y = ldiv!(cache.u, @get_cacheval(cache, :SparspakFactorization), cache.b) - SciMLBase.build_linear_solution(alg, y, nothing, cache) -end - -for alg in InteractiveUtils.subtypes(AbstractFactorization) - @eval function init_cacheval(alg::$alg, A::MatrixOperator, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - init_cacheval(alg, A.A, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - end -end +macro get_cacheval(cache, algsym) + quote + if $(esc(cache)).alg isa DefaultLinearSolver + getfield($(esc(cache)).cacheval, $algsym) + else + $(esc(cache)).cacheval + end + end +end + +_ldiv!(x, A, b) = ldiv!(x, A, b) + +_ldiv!(x, A, b::SVector) = (x .= A \ b) +_ldiv!(::SVector, A, b::SVector) = (A \ b) +_ldiv!(::SVector, A, b) = (A \ b) + +function _ldiv!(x::Vector, A::Factorization, b::Vector) + # workaround https://github.com/JuliaLang/julia/issues/43507 + # Fallback if working with non-square matrices + length(x) != length(b) && return ldiv!(x, A, b) + copyto!(x, b) + ldiv!(A, x) +end + +# RF Bad fallback: will fail if `A` is just a stand-in +# This should instead just create the factorization type. +function init_cacheval(alg::AbstractFactorization, A, b, u, Pl, Pr, maxiters::Int, abstol, + reltol, verbose::Bool, assumptions::OperatorAssumptions) + do_factorization(alg, convert(AbstractMatrix, A), b, u) +end + +## LU Factorizations + +""" +`LUFactorization(pivot=LinearAlgebra.RowMaximum())` + +Julia's built in `lu`. Equivalent to calling `lu!(A)` + + - On dense matrices, this uses the current BLAS implementation of the user's computer, + which by default is OpenBLAS but will use MKL if the user does `using MKL` in their + system. + - On sparse matrices, this will use UMFPACK from SparseArrays. Note that this will not + cache the symbolic factorization. + - On CuMatrix, it will use a CUDA-accelerated LU from CuSolver. + - On BandedMatrix and BlockBandedMatrix, it will use a banded LU. + +## Positional Arguments + + - pivot: The choice of pivoting. Defaults to `LinearAlgebra.RowMaximum()`. The other choice is + `LinearAlgebra.NoPivot()`. +""" +Base.@kwdef struct LUFactorization{P} <: AbstractFactorization + pivot::P = LinearAlgebra.RowMaximum() + reuse_symbolic::Bool = true + check_pattern::Bool = true # Check factorization re-use +end + +# Legacy dispatch +LUFactorization(pivot) = LUFactorization(;pivot=RowMaximum()) + +""" +`GenericLUFactorization(pivot=LinearAlgebra.RowMaximum())` + +Julia's built in generic LU factorization. Equivalent to calling LinearAlgebra.generic_lufact!. +Supports arbitrary number types but does not achieve as good scaling as BLAS-based LU implementations. +Has low overhead and is good for small matrices. + +## Positional Arguments + + - pivot: The choice of pivoting. Defaults to `LinearAlgebra.RowMaximum()`. The other choice is + `LinearAlgebra.NoPivot()`. +""" +struct GenericLUFactorization{P} <: AbstractFactorization + pivot::P +end + +GenericLUFactorization() = GenericLUFactorization(RowMaximum()) + +function SciMLBase.solve!(cache::LinearCache, alg::LUFactorization; kwargs...) + A = cache.A + A = convert(AbstractMatrix, A) + if cache.isfresh + cacheval = @get_cacheval(cache, :LUFactorization) + if A isa AbstractSparseMatrix && alg.reuse_symbolic + # Caches the symbolic factorization: https://github.com/JuliaLang/julia/pull/33738 + # If SparseMatrixCSC, check if the pattern has changed + if alg.check_pattern && pattern_changed(cacheval, A) + fact = lu(A, check = false) + else + fact = lu!(cacheval, A, check = false) + end + else + fact = lu(A, check = false) + end + cache.cacheval = fact + cache.isfresh = false + end + + F = @get_cacheval(cache, :LUFactorization) + y = _ldiv!(cache.u, F, cache.b) + SciMLBase.build_linear_solution(alg, y, nothing, cache) +end + +function do_factorization(alg::LUFactorization, A, b, u) + A = convert(AbstractMatrix, A) + if A isa AbstractSparseMatrixCSC + return lu(SparseMatrixCSC(size(A)..., getcolptr(A), rowvals(A), nonzeros(A)), + check = false) + elseif A isa GPUArraysCore.AnyGPUArray + fact = lu(A; check = false) + elseif !ArrayInterface.can_setindex(typeof(A)) + fact = lu(A, alg.pivot, check = false) + else + fact = lu!(A, alg.pivot, check = false) + end + return fact +end + +function do_factorization(alg::GenericLUFactorization, A, b, u) + A = convert(AbstractMatrix, A) + fact = LinearAlgebra.generic_lufact!(A, alg.pivot, check = false) + return fact +end + +function init_cacheval( + alg::Union{LUFactorization, GenericLUFactorization}, A, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + ArrayInterface.lu_instance(convert(AbstractMatrix, A)) +end + +function init_cacheval(alg::Union{LUFactorization, GenericLUFactorization}, + A::Union{<:Adjoint, <:Transpose}, b, u, Pl, Pr, maxiters::Int, abstol, reltol, + verbose::Bool, assumptions::OperatorAssumptions) + if alg isa LUFactorization + return lu(A; check = false) + else + A isa GPUArraysCore.AnyGPUArray && return nothing + return LinearAlgebra.generic_lufact!(copy(A), alg.pivot; check = false) + end +end + +const PREALLOCATED_LU = ArrayInterface.lu_instance(rand(1, 1)) + +function init_cacheval(alg::Union{LUFactorization, GenericLUFactorization}, + A::Matrix{Float64}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + PREALLOCATED_LU +end + +function init_cacheval(alg::Union{LUFactorization, GenericLUFactorization}, + A::AbstractSciMLOperator, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + nothing +end + +## QRFactorization + +""" +`QRFactorization(pivot=LinearAlgebra.NoPivot(),blocksize=16)` + +Julia's built in `qr`. Equivalent to calling `qr!(A)`. + + - On dense matrices, this uses the current BLAS implementation of the user's computer + which by default is OpenBLAS but will use MKL if the user does `using MKL` in their + system. + - On sparse matrices, this will use SPQR from SparseArrays + - On CuMatrix, it will use a CUDA-accelerated QR from CuSolver. + - On BandedMatrix and BlockBandedMatrix, it will use a banded QR. +""" +struct QRFactorization{P} <: AbstractFactorization + pivot::P + blocksize::Int + inplace::Bool +end + +QRFactorization(inplace = true) = QRFactorization(NoPivot(), 16, inplace) + +function QRFactorization(pivot::LinearAlgebra.PivotingStrategy, inplace::Bool = true) + QRFactorization(pivot, 16, inplace) +end + +function do_factorization(alg::QRFactorization, A, b, u) + A = convert(AbstractMatrix, A) + if ArrayInterface.can_setindex(typeof(A)) + if alg.inplace && !(A isa SparseMatrixCSC) && !(A isa GPUArraysCore.AnyGPUArray) + fact = qr!(A, alg.pivot) + else + fact = qr(A) # CUDA.jl does not allow other args! + end + else + fact = qr(A, alg.pivot) + end + return fact +end + +function init_cacheval(alg::QRFactorization, A, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + ArrayInterface.qr_instance(convert(AbstractMatrix, A), alg.pivot) +end + +const PREALLOCATED_QR_ColumnNorm = ArrayInterface.qr_instance(rand(1, 1), ColumnNorm()) + +function init_cacheval(alg::QRFactorization{ColumnNorm}, A::Matrix{Float64}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + return PREALLOCATED_QR_ColumnNorm +end + +function init_cacheval( + alg::QRFactorization, A::Union{<:Adjoint, <:Transpose}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + A isa GPUArraysCore.AnyGPUArray && return qr(A) + return qr(A, alg.pivot) +end + +const PREALLOCATED_QR_NoPivot = ArrayInterface.qr_instance(rand(1, 1)) + +function init_cacheval(alg::QRFactorization{NoPivot}, A::Matrix{Float64}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + return PREALLOCATED_QR_NoPivot +end + +function init_cacheval(alg::QRFactorization, A::AbstractSciMLOperator, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + nothing +end + +## CholeskyFactorization + +""" +`CholeskyFactorization(; pivot = nothing, tol = 0.0, shift = 0.0, perm = nothing)` + +Julia's built in `cholesky`. Equivalent to calling `cholesky!(A)`. + +## Keyword Arguments + + - pivot: defaluts to NoPivot, can also be RowMaximum. + - tol: the tol argument in CHOLMOD. Only used for sparse matrices. + - shift: the shift argument in CHOLMOD. Only used for sparse matrices. + - perm: the perm argument in CHOLMOD. Only used for sparse matrices. +""" +struct CholeskyFactorization{P, P2} <: AbstractFactorization + pivot::P + tol::Int + shift::Float64 + perm::P2 +end + +function CholeskyFactorization(; pivot = nothing, tol = 0.0, shift = 0.0, perm = nothing) + pivot === nothing && (pivot = NoPivot()) + CholeskyFactorization(pivot, 16, shift, perm) +end + +function do_factorization(alg::CholeskyFactorization, A, b, u) + A = convert(AbstractMatrix, A) + if A isa SparseMatrixCSC + fact = cholesky(A; shift = alg.shift, check = false, perm = alg.perm) + elseif A isa GPUArraysCore.AnyGPUArray + fact = cholesky(A; check = false) + elseif alg.pivot === Val(false) || alg.pivot === NoPivot() + fact = cholesky!(A, alg.pivot; check = false) + else + fact = cholesky!(A, alg.pivot; tol = alg.tol, check = false) + end + return fact +end + +function init_cacheval(alg::CholeskyFactorization, A::SMatrix{S1, S2}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) where {S1, S2} + cholesky(A) +end + +function init_cacheval(alg::CholeskyFactorization, A::GPUArraysCore.AnyGPUArray, b, u, Pl, + Pr, maxiters::Int, abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + cholesky(A; check = false) +end + +function init_cacheval(alg::CholeskyFactorization, A, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + ArrayInterface.cholesky_instance(convert(AbstractMatrix, A), alg.pivot) +end + +const PREALLOCATED_CHOLESKY = ArrayInterface.cholesky_instance(rand(1, 1), NoPivot()) + +function init_cacheval(alg::CholeskyFactorization, A::Matrix{Float64}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + PREALLOCATED_CHOLESKY +end + +function init_cacheval(alg::CholeskyFactorization, + A::Union{Diagonal, AbstractSciMLOperator}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + nothing +end + +## LDLtFactorization + +struct LDLtFactorization{T} <: AbstractFactorization + shift::Float64 + perm::T +end + +function LDLtFactorization(shift = 0.0, perm = nothing) + LDLtFactorization(shift, perm) +end + +function do_factorization(alg::LDLtFactorization, A, b, u) + A = convert(AbstractMatrix, A) + if !(A isa SparseMatrixCSC) + fact = ldlt!(A) + else + fact = ldlt!(A, shift = alg.shift, perm = alg.perm) + end + return fact +end + +function init_cacheval(alg::LDLtFactorization, A, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, + verbose::Bool, assumptions::OperatorAssumptions) + nothing +end + +function init_cacheval(alg::LDLtFactorization, A::SymTridiagonal, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + ArrayInterface.ldlt_instance(convert(AbstractMatrix, A)) +end + +## SVDFactorization + +""" +`SVDFactorization(full=false,alg=LinearAlgebra.DivideAndConquer())` + +Julia's built in `svd`. Equivalent to `svd!(A)`. + + - On dense matrices, this uses the current BLAS implementation of the user's computer + which by default is OpenBLAS but will use MKL if the user does `using MKL` in their + system. +""" +struct SVDFactorization{A} <: AbstractFactorization + full::Bool + alg::A +end + +SVDFactorization() = SVDFactorization(false, LinearAlgebra.DivideAndConquer()) + +function do_factorization(alg::SVDFactorization, A, b, u) + A = convert(AbstractMatrix, A) + if ArrayInterface.can_setindex(typeof(A)) + fact = svd!(A; alg.full, alg.alg) + else + fact = svd(A; alg.full) + end + return fact +end + +function init_cacheval(alg::SVDFactorization, A::Union{Matrix, SMatrix}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + ArrayInterface.svd_instance(convert(AbstractMatrix, A)) +end + +const PREALLOCATED_SVD = ArrayInterface.svd_instance(rand(1, 1)) + +function init_cacheval(alg::SVDFactorization, A::Matrix{Float64}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + PREALLOCATED_SVD +end + +function init_cacheval(alg::SVDFactorization, A, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + nothing +end + +## BunchKaufmanFactorization + +""" +`BunchKaufmanFactorization(; rook = false)` + +Julia's built in `bunchkaufman`. Equivalent to calling `bunchkaufman(A)`. +Only for Symmetric matrices. + +## Keyword Arguments + + - rook: whether to perform rook pivoting. Defaults to false. +""" +Base.@kwdef struct BunchKaufmanFactorization <: AbstractFactorization + rook::Bool = false +end + +function do_factorization(alg::BunchKaufmanFactorization, A, b, u) + A = convert(AbstractMatrix, A) + fact = bunchkaufman!(A, alg.rook; check = false) + return fact +end + +function init_cacheval(alg::BunchKaufmanFactorization, A::Symmetric{<:Number, <:Matrix}, b, + u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + ArrayInterface.bunchkaufman_instance(convert(AbstractMatrix, A)) +end + +const PREALLOCATED_BUNCHKAUFMAN = ArrayInterface.bunchkaufman_instance(Symmetric(rand(1, + 1))) + +function init_cacheval(alg::BunchKaufmanFactorization, + A::Symmetric{Float64, Matrix{Float64}}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + PREALLOCATED_BUNCHKAUFMAN +end + +function init_cacheval(alg::BunchKaufmanFactorization, A, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + nothing +end + +## GenericFactorization + +""" +`GenericFactorization(;fact_alg=LinearAlgebra.factorize)`: Constructs a linear solver from a generic +factorization algorithm `fact_alg` which complies with the Base.LinearAlgebra +factorization API. Quoting from Base: + + * If `A` is upper or lower triangular (or diagonal), no factorization of `A` is + required. The system is then solved with either forward or backward substitution. + For non-triangular square matrices, an LU factorization is used. + For rectangular `A` the result is the minimum-norm least squares solution computed by a + pivoted QR factorization of `A` and a rank estimate of `A` based on the R factor. + When `A` is sparse, a similar polyalgorithm is used. For indefinite matrices, the `LDLt` + factorization does not use pivoting during the numerical factorization and therefore the + procedure can fail even for invertible matrices. + +## Keyword Arguments + + - fact_alg: the factorization algorithm to use. Defaults to `LinearAlgebra.factorize`, but can be + swapped to choices like `lu`, `qr` +""" +struct GenericFactorization{F} <: AbstractFactorization + fact_alg::F +end + +GenericFactorization(; fact_alg = LinearAlgebra.factorize) = GenericFactorization(fact_alg) + +function do_factorization(alg::GenericFactorization, A, b, u) + A = convert(AbstractMatrix, A) + fact = alg.fact_alg(A) + return fact +end + +function init_cacheval( + alg::GenericFactorization{typeof(lu)}, A::AbstractMatrix, b, u, Pl, Pr, + maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + ArrayInterface.lu_instance(A) +end +function init_cacheval( + alg::GenericFactorization{typeof(lu!)}, A::AbstractMatrix, b, u, Pl, Pr, + maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + ArrayInterface.lu_instance(A) +end + +function init_cacheval(alg::GenericFactorization{typeof(lu)}, + A::StridedMatrix{<:LinearAlgebra.BlasFloat}, b, u, Pl, Pr, + maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + ArrayInterface.lu_instance(A) +end +function init_cacheval(alg::GenericFactorization{typeof(lu!)}, + A::StridedMatrix{<:LinearAlgebra.BlasFloat}, b, u, Pl, Pr, + maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + ArrayInterface.lu_instance(A) +end +function init_cacheval(alg::GenericFactorization{typeof(lu)}, A::Diagonal, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + Diagonal(inv.(A.diag)) +end +function init_cacheval(alg::GenericFactorization{typeof(lu)}, A::Tridiagonal, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + ArrayInterface.lu_instance(A) +end +function init_cacheval(alg::GenericFactorization{typeof(lu!)}, A::Diagonal, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + Diagonal(inv.(A.diag)) +end +function init_cacheval( + alg::GenericFactorization{typeof(lu!)}, A::Tridiagonal, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + ArrayInterface.lu_instance(A) +end +function init_cacheval( + alg::GenericFactorization{typeof(lu!)}, A::SymTridiagonal{T, V}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) where {T, V} + LinearAlgebra.LDLt{T, SymTridiagonal{T, V}}(A) +end +function init_cacheval( + alg::GenericFactorization{typeof(lu)}, A::SymTridiagonal{T, V}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) where {T, V} + LinearAlgebra.LDLt{T, SymTridiagonal{T, V}}(A) +end + +function init_cacheval( + alg::GenericFactorization{typeof(qr)}, A::AbstractMatrix, b, u, Pl, Pr, + maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + ArrayInterface.qr_instance(A) +end +function init_cacheval( + alg::GenericFactorization{typeof(qr!)}, A::AbstractMatrix, b, u, Pl, Pr, + maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + ArrayInterface.qr_instance(A) +end +function init_cacheval( + alg::GenericFactorization{typeof(qr)}, A::SymTridiagonal{T, V}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) where {T, V} + LinearAlgebra.LDLt{T, SymTridiagonal{T, V}}(A) +end +function init_cacheval( + alg::GenericFactorization{typeof(qr!)}, A::SymTridiagonal{T, V}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) where {T, V} + LinearAlgebra.LDLt{T, SymTridiagonal{T, V}}(A) +end + +function init_cacheval(alg::GenericFactorization{typeof(qr)}, + A::StridedMatrix{<:LinearAlgebra.BlasFloat}, b, u, Pl, Pr, + maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + ArrayInterface.qr_instance(A) +end +function init_cacheval(alg::GenericFactorization{typeof(qr!)}, + A::StridedMatrix{<:LinearAlgebra.BlasFloat}, b, u, Pl, Pr, + maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + ArrayInterface.qr_instance(A) +end +function init_cacheval(alg::GenericFactorization{typeof(qr)}, A::Diagonal, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + Diagonal(inv.(A.diag)) +end +function init_cacheval(alg::GenericFactorization{typeof(qr)}, A::Tridiagonal, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + ArrayInterface.qr_instance(A) +end +function init_cacheval(alg::GenericFactorization{typeof(qr!)}, A::Diagonal, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + Diagonal(inv.(A.diag)) +end +function init_cacheval( + alg::GenericFactorization{typeof(qr!)}, A::Tridiagonal, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + ArrayInterface.qr_instance(A) +end + +function init_cacheval( + alg::GenericFactorization{typeof(svd)}, A::AbstractMatrix, b, u, Pl, Pr, + maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + ArrayInterface.svd_instance(A) +end +function init_cacheval( + alg::GenericFactorization{typeof(svd!)}, A::AbstractMatrix, b, u, Pl, Pr, + maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + ArrayInterface.svd_instance(A) +end + +function init_cacheval(alg::GenericFactorization{typeof(svd)}, + A::StridedMatrix{<:LinearAlgebra.BlasFloat}, b, u, Pl, Pr, + maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + ArrayInterface.svd_instance(A) +end +function init_cacheval(alg::GenericFactorization{typeof(svd!)}, + A::StridedMatrix{<:LinearAlgebra.BlasFloat}, b, u, Pl, Pr, + maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + ArrayInterface.svd_instance(A) +end +function init_cacheval(alg::GenericFactorization{typeof(svd)}, A::Diagonal, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + Diagonal(inv.(A.diag)) +end +function init_cacheval( + alg::GenericFactorization{typeof(svd)}, A::Tridiagonal, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + ArrayInterface.svd_instance(A) +end +function init_cacheval(alg::GenericFactorization{typeof(svd!)}, A::Diagonal, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + Diagonal(inv.(A.diag)) +end +function init_cacheval(alg::GenericFactorization{typeof(svd!)}, A::Tridiagonal, b, u, Pl, + Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + ArrayInterface.svd_instance(A) +end +function init_cacheval( + alg::GenericFactorization{typeof(svd!)}, A::SymTridiagonal{T, V}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) where {T, V} + LinearAlgebra.LDLt{T, SymTridiagonal{T, V}}(A) +end +function init_cacheval( + alg::GenericFactorization{typeof(svd)}, A::SymTridiagonal{T, V}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) where {T, V} + LinearAlgebra.LDLt{T, SymTridiagonal{T, V}}(A) +end + +function init_cacheval(alg::GenericFactorization, A::Diagonal, b, u, Pl, Pr, maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + Diagonal(inv.(A.diag)) +end +function init_cacheval(alg::GenericFactorization, A::Tridiagonal, b, u, Pl, Pr, + maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + ArrayInterface.lu_instance(A) +end +function init_cacheval(alg::GenericFactorization, A::SymTridiagonal{T, V}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) where {T, V} + LinearAlgebra.LDLt{T, SymTridiagonal{T, V}}(A) +end +function init_cacheval(alg::GenericFactorization, A, b, u, Pl, Pr, + maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + init_cacheval(alg, convert(AbstractMatrix, A), b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) +end +function init_cacheval(alg::GenericFactorization, A::AbstractMatrix, b, u, Pl, Pr, + maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + do_factorization(alg, A, b, u) +end + +function init_cacheval( + alg::Union{GenericFactorization{typeof(bunchkaufman!)}, + GenericFactorization{typeof(bunchkaufman)}}, + A::Union{Hermitian, Symmetric}, b, u, Pl, Pr, maxiters::Int, abstol, + reltol, verbose::Bool, assumptions::OperatorAssumptions) + BunchKaufman(A.data, Array(1:size(A, 1)), A.uplo, true, false, 0) +end + +function init_cacheval( + alg::Union{GenericFactorization{typeof(bunchkaufman!)}, + GenericFactorization{typeof(bunchkaufman)}}, + A::StridedMatrix{<:LinearAlgebra.BlasFloat}, b, u, Pl, Pr, + maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + if eltype(A) <: Complex + return bunchkaufman!(Hermitian(A)) + else + return bunchkaufman!(Symmetric(A)) + end +end + +# Fallback, tries to make nonsingular and just factorizes +# Try to never use it. + +# Cholesky needs the posdef matrix, for GenericFactorization assume structure is needed +function init_cacheval( + alg::GenericFactorization{typeof(cholesky)}, A::AbstractMatrix, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + newA = copy(convert(AbstractMatrix, A)) + do_factorization(alg, newA, b, u) +end +function init_cacheval( + alg::GenericFactorization{typeof(cholesky!)}, A::AbstractMatrix, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + newA = copy(convert(AbstractMatrix, A)) + do_factorization(alg, newA, b, u) +end +function init_cacheval(alg::GenericFactorization{typeof(cholesky!)}, + A::Diagonal, b, u, Pl, Pr, maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + Diagonal(inv.(A.diag)) +end +function init_cacheval( + alg::GenericFactorization{typeof(cholesky!)}, A::Tridiagonal, b, u, Pl, Pr, + maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + ArrayInterface.lu_instance(A) +end +function init_cacheval( + alg::GenericFactorization{typeof(cholesky!)}, A::SymTridiagonal{T, V}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) where {T, V} + LinearAlgebra.LDLt{T, SymTridiagonal{T, V}}(A) +end +function init_cacheval(alg::GenericFactorization{typeof(cholesky)}, + A::Diagonal, b, u, Pl, Pr, maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + Diagonal(inv.(A.diag)) +end +function init_cacheval( + alg::GenericFactorization{typeof(cholesky)}, A::Tridiagonal, b, u, Pl, Pr, + maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + ArrayInterface.lu_instance(A) +end +function init_cacheval( + alg::GenericFactorization{typeof(cholesky)}, A::SymTridiagonal{T, V}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) where {T, V} + LinearAlgebra.LDLt{T, SymTridiagonal{T, V}}(A) +end + +function init_cacheval(alg::GenericFactorization, + A::Union{Hermitian{T, <:SparseMatrixCSC}, + Symmetric{T, <:SparseMatrixCSC}}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) where {T} + newA = copy(convert(AbstractMatrix, A)) + do_factorization(alg, newA, b, u) +end + +# Ambiguity handling dispatch + +################################## Factorizations which require solve! overloads + +""" +`UMFPACKFactorization(;reuse_symbolic=true, check_pattern=true)` + +A fast sparse multithreaded LU-factorization which specializes on sparsity +patterns with “more structure”. + +!!! note + + By default, the SparseArrays.jl are implemented for efficiency by caching the + symbolic factorization. I.e., if `set_A` is used, it is expected that the new + `A` has the same sparsity pattern as the previous `A`. If this algorithm is to + be used in a context where that assumption does not hold, set `reuse_symbolic=false`. +""" +Base.@kwdef struct UMFPACKFactorization <: AbstractFactorization + reuse_symbolic::Bool = true + check_pattern::Bool = true # Check factorization re-use +end + +const PREALLOCATED_UMFPACK = SparseArrays.UMFPACK.UmfpackLU(SparseMatrixCSC(0, 0, [1], + Int[], Float64[])) + +function init_cacheval(alg::UMFPACKFactorization, + A, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, + verbose::Bool, assumptions::OperatorAssumptions) + nothing +end + +function init_cacheval(alg::UMFPACKFactorization, A::SparseMatrixCSC{Float64, Int}, b, u, + Pl, Pr, + maxiters::Int, abstol, reltol, + verbose::Bool, assumptions::OperatorAssumptions) + PREALLOCATED_UMFPACK +end + +function init_cacheval(alg::UMFPACKFactorization, A::AbstractSparseArray, b, u, Pl, Pr, + maxiters::Int, abstol, + reltol, + verbose::Bool, assumptions::OperatorAssumptions) + A = convert(AbstractMatrix, A) + zerobased = SparseArrays.getcolptr(A)[1] == 0 + return SparseArrays.UMFPACK.UmfpackLU(SparseMatrixCSC(size(A)..., getcolptr(A), + rowvals(A), nonzeros(A))) +end + +function SciMLBase.solve!(cache::LinearCache, alg::UMFPACKFactorization; kwargs...) + A = cache.A + A = convert(AbstractMatrix, A) + if cache.isfresh + cacheval = @get_cacheval(cache, :UMFPACKFactorization) + if alg.reuse_symbolic + # Caches the symbolic factorization: https://github.com/JuliaLang/julia/pull/33738 + if alg.check_pattern && pattern_changed(cacheval, A) + fact = lu( + SparseMatrixCSC(size(A)..., getcolptr(A), rowvals(A), + nonzeros(A)), + check = false) + else + fact = lu!(cacheval, + SparseMatrixCSC(size(A)..., getcolptr(A), rowvals(A), + nonzeros(A)), check = false) + end + else + fact = lu(SparseMatrixCSC(size(A)..., getcolptr(A), rowvals(A), nonzeros(A)), + check = false) + end + cache.cacheval = fact + cache.isfresh = false + end + + F = @get_cacheval(cache, :UMFPACKFactorization) + if F.status == SparseArrays.UMFPACK.UMFPACK_OK + y = ldiv!(cache.u, F, cache.b) + SciMLBase.build_linear_solution(alg, y, nothing, cache) + else + SciMLBase.build_linear_solution( + alg, cache.u, nothing, cache; retcode = ReturnCode.Infeasible) + end +end + +""" +`KLUFactorization(;reuse_symbolic=true, check_pattern=true)` + +A fast sparse LU-factorization which specializes on sparsity patterns with “less structure”. + +!!! note + + By default, the SparseArrays.jl are implemented for efficiency by caching the + symbolic factorization. I.e., if `set_A` is used, it is expected that the new + `A` has the same sparsity pattern as the previous `A`. If this algorithm is to + be used in a context where that assumption does not hold, set `reuse_symbolic=false`. +""" +Base.@kwdef struct KLUFactorization <: AbstractFactorization + reuse_symbolic::Bool = true + check_pattern::Bool = true +end + +const PREALLOCATED_KLU = KLU.KLUFactorization(SparseMatrixCSC(0, 0, [1], Int[], + Float64[])) + +function init_cacheval(alg::KLUFactorization, + A, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, + verbose::Bool, assumptions::OperatorAssumptions) + nothing +end + +function init_cacheval(alg::KLUFactorization, A::SparseMatrixCSC{Float64, Int}, b, u, Pl, + Pr, + maxiters::Int, abstol, reltol, + verbose::Bool, assumptions::OperatorAssumptions) + PREALLOCATED_KLU +end + +function init_cacheval(alg::KLUFactorization, A::AbstractSparseArray, b, u, Pl, Pr, + maxiters::Int, abstol, + reltol, + verbose::Bool, assumptions::OperatorAssumptions) + A = convert(AbstractMatrix, A) + return KLU.KLUFactorization(SparseMatrixCSC(size(A)..., getcolptr(A), rowvals(A), + nonzeros(A))) +end + +# TODO: guard this against errors +function SciMLBase.solve!(cache::LinearCache, alg::KLUFactorization; kwargs...) + A = cache.A + A = convert(AbstractMatrix, A) + if cache.isfresh + cacheval = @get_cacheval(cache, :KLUFactorization) + if alg.reuse_symbolic + if alg.check_pattern && pattern_changed(cacheval, A) + fact = KLU.klu( + SparseMatrixCSC(size(A)..., getcolptr(A), rowvals(A), + nonzeros(A)), + check = false) + else + fact = KLU.klu!(cacheval, nonzeros(A), check = false) + end + else + # New fact each time since the sparsity pattern can change + # and thus it needs to reallocate + fact = KLU.klu(SparseMatrixCSC(size(A)..., getcolptr(A), rowvals(A), + nonzeros(A))) + end + cache.cacheval = fact + cache.isfresh = false + end + F = @get_cacheval(cache, :KLUFactorization) + if F.common.status == KLU.KLU_OK + y = ldiv!(cache.u, F, cache.b) + SciMLBase.build_linear_solution(alg, y, nothing, cache) + else + SciMLBase.build_linear_solution( + alg, cache.u, nothing, cache; retcode = ReturnCode.Infeasible) + end +end + +## CHOLMODFactorization + +""" +`CHOLMODFactorization(; shift = 0.0, perm = nothing)` + +A wrapper of CHOLMOD's polyalgorithm, mixing Cholesky factorization and ldlt. +Tries cholesky for performance and retries ldlt if conditioning causes Cholesky +to fail. + +Only supports sparse matrices. + +## Keyword Arguments + + - shift: the shift argument in CHOLMOD. + - perm: the perm argument in CHOLMOD +""" +Base.@kwdef struct CHOLMODFactorization{T} <: AbstractFactorization + shift::Float64 = 0.0 + perm::T = nothing +end + +const PREALLOCATED_CHOLMOD = cholesky(SparseMatrixCSC(0, 0, [1], Int[], Float64[])) + +function init_cacheval(alg::CHOLMODFactorization, + A, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, + verbose::Bool, assumptions::OperatorAssumptions) + nothing +end + +function init_cacheval(alg::CHOLMODFactorization, + A::Union{SparseMatrixCSC{T, Int}, Symmetric{T, SparseMatrixCSC{T, Int}}}, b, u, + Pl, Pr, + maxiters::Int, abstol, reltol, + verbose::Bool, assumptions::OperatorAssumptions) where {T <: + Union{Float32, Float64}} + PREALLOCATED_CHOLMOD +end + +function SciMLBase.solve!(cache::LinearCache, alg::CHOLMODFactorization; kwargs...) + A = cache.A + A = convert(AbstractMatrix, A) + + if cache.isfresh + cacheval = @get_cacheval(cache, :CHOLMODFactorization) + fact = cholesky(A; check = false) + if !LinearAlgebra.issuccess(fact) + ldlt!(fact, A; check = false) + end + cache.cacheval = fact + cache.isfresh = false + end + + cache.u .= @get_cacheval(cache, :CHOLMODFactorization) \ cache.b + SciMLBase.build_linear_solution(alg, cache.u, nothing, cache) +end + +## RFLUFactorization + +""" +`RFLUFactorization()` + +A fast pure Julia LU-factorization implementation +using RecursiveFactorization.jl. This is by far the fastest LU-factorization +implementation, usually outperforming OpenBLAS and MKL for smaller matrices +(<500x500), but currently optimized only for Base `Array` with `Float32` or `Float64`. +Additional optimization for complex matrices is in the works. +""" +struct RFLUFactorization{P, T} <: AbstractFactorization + RFLUFactorization(::Val{P}, ::Val{T}) where {P, T} = new{P, T}() +end + +function RFLUFactorization(; pivot = Val(true), thread = Val(true)) + RFLUFactorization(pivot, thread) +end + +function init_cacheval(alg::RFLUFactorization, A, b, u, Pl, Pr, maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + ipiv = Vector{LinearAlgebra.BlasInt}(undef, min(size(A)...)) + ArrayInterface.lu_instance(convert(AbstractMatrix, A)), ipiv +end + +function init_cacheval(alg::RFLUFactorization, A::Matrix{Float64}, b, u, Pl, Pr, + maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + ipiv = Vector{LinearAlgebra.BlasInt}(undef, 0) + PREALLOCATED_LU, ipiv +end + +function init_cacheval(alg::RFLUFactorization, + A::Union{AbstractSparseArray, AbstractSciMLOperator}, b, u, Pl, Pr, + maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + nothing, nothing +end + +function init_cacheval(alg::RFLUFactorization, + A::Union{Diagonal, SymTridiagonal, Tridiagonal}, b, u, Pl, Pr, + maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + nothing, nothing +end + +function SciMLBase.solve!(cache::LinearCache, alg::RFLUFactorization{P, T}; + kwargs...) where {P, T} + A = cache.A + A = convert(AbstractMatrix, A) + fact, ipiv = @get_cacheval(cache, :RFLUFactorization) + if cache.isfresh + if length(ipiv) != min(size(A)...) + ipiv = Vector{LinearAlgebra.BlasInt}(undef, min(size(A)...)) + end + fact = RecursiveFactorization.lu!(A, ipiv, Val(P), Val(T), check = false) + cache.cacheval = (fact, ipiv) + cache.isfresh = false + end + y = ldiv!(cache.u, @get_cacheval(cache, :RFLUFactorization)[1], cache.b) + SciMLBase.build_linear_solution(alg, y, nothing, cache) +end + +## NormalCholeskyFactorization + +""" +`NormalCholeskyFactorization(pivot = RowMaximum())` + +A fast factorization which uses a Cholesky factorization on A * A'. Can be much +faster than LU factorization, but is not as numerically stable and thus should only +be applied to well-conditioned matrices. + +## Positional Arguments + + - pivot: Defaults to RowMaximum(), but can be NoPivot() +""" +struct NormalCholeskyFactorization{P} <: AbstractFactorization + pivot::P +end + +function NormalCholeskyFactorization(; pivot = nothing) + pivot === nothing && (pivot = NoPivot()) + NormalCholeskyFactorization(pivot) +end + +default_alias_A(::NormalCholeskyFactorization, ::Any, ::Any) = true +default_alias_b(::NormalCholeskyFactorization, ::Any, ::Any) = true + +const PREALLOCATED_NORMALCHOLESKY = ArrayInterface.cholesky_instance(rand(1, 1), NoPivot()) + +function init_cacheval(alg::NormalCholeskyFactorization, + A::Union{AbstractSparseArray, GPUArraysCore.AnyGPUArray, + Symmetric{<:Number, <:AbstractSparseArray}}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + ArrayInterface.cholesky_instance(convert(AbstractMatrix, A)) +end + +function init_cacheval(alg::NormalCholeskyFactorization, A::SMatrix, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + return cholesky(Symmetric((A)' * A)) +end + +function init_cacheval(alg::NormalCholeskyFactorization, A, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + A_ = convert(AbstractMatrix, A) + return ArrayInterface.cholesky_instance( + Symmetric(Matrix{eltype(A)}(undef, 0, 0)), alg.pivot) +end + +const PREALLOCATED_NORMALCHOLESKY_SYMMETRIC = ArrayInterface.cholesky_instance( + Symmetric(rand(1, 1)), NoPivot()) + +function init_cacheval(alg::NormalCholeskyFactorization, A::Matrix{Float64}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + return PREALLOCATED_NORMALCHOLESKY_SYMMETRIC +end + +function init_cacheval(alg::NormalCholeskyFactorization, + A::Union{Diagonal, AbstractSciMLOperator}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + nothing +end + +function SciMLBase.solve!(cache::LinearCache, alg::NormalCholeskyFactorization; kwargs...) + A = cache.A + A = convert(AbstractMatrix, A) + if cache.isfresh + if A isa SparseMatrixCSC || A isa GPUArraysCore.AnyGPUArray || A isa SMatrix + fact = cholesky(Symmetric((A)' * A); check = false) + else + fact = cholesky(Symmetric((A)' * A), alg.pivot; check = false) + end + cache.cacheval = fact + cache.isfresh = false + end + if A isa SparseMatrixCSC + cache.u .= @get_cacheval(cache, :NormalCholeskyFactorization) \ (A' * cache.b) + y = cache.u + elseif A isa StaticArray + cache.u = @get_cacheval(cache, :NormalCholeskyFactorization) \ (A' * cache.b) + y = cache.u + else + y = ldiv!(cache.u, @get_cacheval(cache, :NormalCholeskyFactorization), A' * cache.b) + end + SciMLBase.build_linear_solution(alg, y, nothing, cache) +end + +## NormalBunchKaufmanFactorization + +""" +`NormalBunchKaufmanFactorization(rook = false)` + +A fast factorization which uses a BunchKaufman factorization on A * A'. Can be much +faster than LU factorization, but is not as numerically stable and thus should only +be applied to well-conditioned matrices. + +## Positional Arguments + + - rook: whether to perform rook pivoting. Defaults to false. +""" +struct NormalBunchKaufmanFactorization <: AbstractFactorization + rook::Bool +end + +function NormalBunchKaufmanFactorization(; rook = false) + NormalBunchKaufmanFactorization(rook) +end + +default_alias_A(::NormalBunchKaufmanFactorization, ::Any, ::Any) = true +default_alias_b(::NormalBunchKaufmanFactorization, ::Any, ::Any) = true + +function init_cacheval(alg::NormalBunchKaufmanFactorization, A, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + ArrayInterface.bunchkaufman_instance(convert(AbstractMatrix, A)) +end + +function SciMLBase.solve!(cache::LinearCache, alg::NormalBunchKaufmanFactorization; + kwargs...) + A = cache.A + A = convert(AbstractMatrix, A) + if cache.isfresh + fact = bunchkaufman(Symmetric((A)' * A), alg.rook) + cache.cacheval = fact + cache.isfresh = false + end + y = ldiv!(cache.u, @get_cacheval(cache, :NormalBunchKaufmanFactorization), A' * cache.b) + SciMLBase.build_linear_solution(alg, y, nothing, cache) +end + +## DiagonalFactorization + +""" +`DiagonalFactorization()` + +A special implementation only for solving `Diagonal` matrices fast. +""" +struct DiagonalFactorization <: AbstractFactorization end + +function init_cacheval(alg::DiagonalFactorization, A, b, u, Pl, Pr, maxiters::Int, + abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + nothing +end + +function SciMLBase.solve!(cache::LinearCache, alg::DiagonalFactorization; + kwargs...) + A = convert(AbstractMatrix, cache.A) + if cache.u isa Vector && cache.b isa Vector + @simd ivdep for i in eachindex(cache.u) + cache.u[i] = A.diag[i] \ cache.b[i] + end + else + cache.u .= A.diag .\ cache.b + end + SciMLBase.build_linear_solution(alg, cache.u, nothing, cache) +end + +## FastLAPACKFactorizations + +struct WorkspaceAndFactors{W, F} + workspace::W + factors::F +end + +# There's no options like pivot here. +# But I'm not sure it makes sense as a GenericFactorization +# since it just uses `LAPACK.getrf!`. +""" +`FastLUFactorization()` + +The FastLapackInterface.jl version of the LU factorization. Notably, +this version does not allow for choice of pivoting method. +""" +struct FastLUFactorization <: AbstractFactorization end + +function init_cacheval(::FastLUFactorization, A, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + ws = LUWs(A) + return WorkspaceAndFactors(ws, ArrayInterface.lu_instance(convert(AbstractMatrix, A))) +end + +function SciMLBase.solve!(cache::LinearCache, alg::FastLUFactorization; kwargs...) + A = cache.A + A = convert(AbstractMatrix, A) + ws_and_fact = @get_cacheval(cache, :FastLUFactorization) + if cache.isfresh + # we will fail here if A is a different *size* than in a previous version of the same cache. + # it may instead be desirable to resize the workspace. + @set! ws_and_fact.factors = LinearAlgebra.LU(LAPACK.getrf!(ws_and_fact.workspace, + A)...) + cache.cacheval = ws_and_fact + cache.isfresh = false + end + y = ldiv!(cache.u, cache.cacheval.factors, cache.b) + SciMLBase.build_linear_solution(alg, y, nothing, cache) +end + +""" +`FastQRFactorization()` + +The FastLapackInterface.jl version of the QR factorization. +""" +struct FastQRFactorization{P} <: AbstractFactorization + pivot::P + blocksize::Int +end + +# is 36 or 16 better here? LinearAlgebra and FastLapackInterface use 36, +# but QRFactorization uses 16. +FastQRFactorization() = FastQRFactorization(NoPivot(), 36) + +function init_cacheval(alg::FastQRFactorization{NoPivot}, A::AbstractMatrix, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + ws = QRWYWs(A; blocksize = alg.blocksize) + return WorkspaceAndFactors(ws, + ArrayInterface.qr_instance(convert(AbstractMatrix, A))) +end +function init_cacheval(::FastQRFactorization{ColumnNorm}, A::AbstractMatrix, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + ws = QRpWs(A) + return WorkspaceAndFactors(ws, + ArrayInterface.qr_instance(convert(AbstractMatrix, A))) +end + +function init_cacheval(alg::FastQRFactorization, A, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + return init_cacheval(alg, convert(AbstractMatrix, A), b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) +end + +function SciMLBase.solve!(cache::LinearCache, alg::FastQRFactorization{P}; + kwargs...) where {P} + A = cache.A + A = convert(AbstractMatrix, A) + ws_and_fact = @get_cacheval(cache, :FastQRFactorization) + if cache.isfresh + # we will fail here if A is a different *size* than in a previous version of the same cache. + # it may instead be desirable to resize the workspace. + if P === NoPivot + @set! ws_and_fact.factors = LinearAlgebra.QRCompactWY(LAPACK.geqrt!( + ws_and_fact.workspace, + A)...) + else + @set! ws_and_fact.factors = LinearAlgebra.QRPivoted(LAPACK.geqp3!( + ws_and_fact.workspace, + A)...) + end + cache.cacheval = ws_and_fact + cache.isfresh = false + end + y = ldiv!(cache.u, cache.cacheval.factors, cache.b) + SciMLBase.build_linear_solution(alg, y, nothing, cache) +end + +## SparspakFactorization is here since it's MIT licensed, not GPL + +""" +`SparspakFactorization(reuse_symbolic = true)` + +This is the translation of the well-known sparse matrix software Sparspak +(Waterloo Sparse Matrix Package), solving +large sparse systems of linear algebraic equations. Sparspak is composed of the +subroutines from the book "Computer Solution of Large Sparse Positive Definite +Systems" by Alan George and Joseph Liu. Originally written in Fortran 77, later +rewritten in Fortran 90. Here is the software translated into Julia. + +The Julia rewrite is released under the MIT license with an express permission +from the authors of the Fortran package. The package uses multiple +dispatch to route around standard BLAS routines in the case e.g. of arbitrary-precision +floating point numbers or ForwardDiff.Dual. +This e.g. allows for Automatic Differentiation (AD) of a sparse-matrix solve. +""" +Base.@kwdef struct SparspakFactorization <: AbstractFactorization + reuse_symbolic::Bool = true +end + +const PREALLOCATED_SPARSEPAK = sparspaklu(SparseMatrixCSC(0, 0, [1], Int[], Float64[]), + factorize = false) + +function init_cacheval(alg::SparspakFactorization, + A::Union{Matrix, Nothing, AbstractSciMLOperator}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, + verbose::Bool, assumptions::OperatorAssumptions) + nothing +end + +function init_cacheval(::SparspakFactorization, A::SparseMatrixCSC{Float64, Int}, b, u, Pl, + Pr, maxiters::Int, abstol, + reltol, + verbose::Bool, assumptions::OperatorAssumptions) + PREALLOCATED_SPARSEPAK +end + +function init_cacheval(::SparspakFactorization, A, b, u, Pl, Pr, maxiters::Int, abstol, + reltol, + verbose::Bool, assumptions::OperatorAssumptions) + A = convert(AbstractMatrix, A) + if A isa SparseArrays.AbstractSparseArray + return sparspaklu( + SparseMatrixCSC(size(A)..., getcolptr(A), rowvals(A), + nonzeros(A)), + factorize = false) + else + return sparspaklu(SparseMatrixCSC(0, 0, [1], Int[], eltype(A)[]), + factorize = false) + end +end + +function init_cacheval(::SparspakFactorization, ::StaticArray, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + nothing +end + +function SciMLBase.solve!(cache::LinearCache, alg::SparspakFactorization; kwargs...) + A = cache.A + if cache.isfresh + if cache.cacheval !== nothing && alg.reuse_symbolic + fact = sparspaklu!(@get_cacheval(cache, :SparspakFactorization), + SparseMatrixCSC(size(A)..., getcolptr(A), rowvals(A), + nonzeros(A))) + else + fact = sparspaklu(SparseMatrixCSC(size(A)..., getcolptr(A), rowvals(A), + nonzeros(A))) + end + cache.cacheval = fact + cache.isfresh = false + end + y = ldiv!(cache.u, @get_cacheval(cache, :SparspakFactorization), cache.b) + SciMLBase.build_linear_solution(alg, y, nothing, cache) +end + +for alg in InteractiveUtils.subtypes(AbstractFactorization) + @eval function init_cacheval(alg::$alg, A::MatrixOperator, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + init_cacheval(alg, A.A, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + end +end From 1d861d8fbd75234db748025daa4127979cc27ec5 Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Fri, 19 Apr 2024 14:24:14 -0400 Subject: [PATCH 3/4] Fix CUDSS loading and warning --- Project.toml | 294 ++++++++++----------- ext/LinearSolveCUDAExt.jl | 87 +++++-- ext/LinearSolveCUDSSExt.jl | 8 + src/LinearSolve.jl | 512 +++++++++++++++++++------------------ src/factorization.jl | 1 + 5 files changed, 476 insertions(+), 426 deletions(-) create mode 100644 ext/LinearSolveCUDSSExt.jl diff --git a/Project.toml b/Project.toml index d4295eecd..baf6b5359 100644 --- a/Project.toml +++ b/Project.toml @@ -1,146 +1,148 @@ -name = "LinearSolve" -uuid = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae" -authors = ["SciML"] -version = "2.28.0" - -[deps] -ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" -ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" -ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471" -DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" -EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56" -FastLapackInterface = "29a986be-02c6-4525-aec4-84b980013641" -GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" -InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" -KLU = "ef3ab10e-7fda-4108-b977-705223b18434" -Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" -LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02" -Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" -LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -MKL_jll = "856f044c-d86e-5d09-b602-aeab76dc8ba7" -Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" -PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" -Preferences = "21216c6a-2e73-6563-6e65-726566657250" -RecursiveFactorization = "f2c3362d-daeb-58d1-803e-2bc74f2840b4" -Reexport = "189a3867-3050-52da-a836-e630ba90ab69" -SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" -SciMLOperators = "c0aeaf25-5076-4817-a8d5-81caf7dfa961" -Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46" -SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" -Sparspak = "e56a9233-b9d6-4f03-8d0f-1825330902ac" -StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" -UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" - -[weakdeps] -BandedMatrices = "aae01518-5342-5314-be14-df237901396f" -BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0" -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" -Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" -EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" -FastAlmostBandedMatrices = "9d29842c-ecb8-4973-b1e9-a27b1157504e" -HYPRE = "b5ffcf37-a2bd-41ab-a3da-4bd9bc8ad771" -IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153" -KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" -KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77" -Metal = "dde4c033-4e86-420c-a63e-0dd931031962" -Pardiso = "46dd5b70-b6fb-5a00-ae2d-e8fea33afaf2" -RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd" - -[extensions] -LinearSolveBandedMatricesExt = "BandedMatrices" -LinearSolveBlockDiagonalsExt = "BlockDiagonals" -LinearSolveCUDAExt = "CUDA" -LinearSolveEnzymeExt = ["Enzyme", "EnzymeCore"] -LinearSolveFastAlmostBandedMatricesExt = ["FastAlmostBandedMatrices"] -LinearSolveHYPREExt = "HYPRE" -LinearSolveIterativeSolversExt = "IterativeSolvers" -LinearSolveKernelAbstractionsExt = "KernelAbstractions" -LinearSolveKrylovKitExt = "KrylovKit" -LinearSolveMetalExt = "Metal" -LinearSolvePardisoExt = "Pardiso" -LinearSolveRecursiveArrayToolsExt = "RecursiveArrayTools" - -[compat] -AllocCheck = "0.1" -Aqua = "0.8" -ArrayInterface = "7.7" -BandedMatrices = "1.5" -BlockDiagonals = "0.1.42" -CUDA = "5" -ChainRulesCore = "1.22" -ConcreteStructs = "0.2.3" -DocStringExtensions = "0.9.3" -EnumX = "1.0.4" -Enzyme = "0.11.15" -EnzymeCore = "0.6.5" -FastAlmostBandedMatrices = "0.1" -FastLapackInterface = "2" -FiniteDiff = "2.22" -ForwardDiff = "0.10.36" -GPUArraysCore = "0.1.6" -HYPRE = "1.4.0" -InteractiveUtils = "1.10" -IterativeSolvers = "0.9.3" -JET = "0.8.28" -KLU = "0.6" -KernelAbstractions = "0.9.16" -Krylov = "0.9" -KrylovKit = "0.6" -LazyArrays = "1.8" -Libdl = "1.10" -LinearAlgebra = "1.10" -MPI = "0.20" -Markdown = "1.10" -Metal = "1" -MultiFloats = "1" -Pardiso = "0.5" -Pkg = "1" -PrecompileTools = "1.2" -Preferences = "1.4" -Random = "1" -RecursiveArrayTools = "3.8" -RecursiveFactorization = "0.2.14" -Reexport = "1" -SafeTestsets = "0.1" -SciMLBase = "2.26.3" -SciMLOperators = "0.3.7" -Setfield = "1" -SparseArrays = "1.10" -Sparspak = "0.3.6" -StableRNGs = "1" -StaticArrays = "1.5" -StaticArraysCore = "1.4.2" -Test = "1" -UnPack = "1" -Zygote = "0.6.69" -julia = "1.10" - -[extras] -AllocCheck = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a" -Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" -BandedMatrices = "aae01518-5342-5314-be14-df237901396f" -BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0" -Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" -FastAlmostBandedMatrices = "9d29842c-ecb8-4973-b1e9-a27b1157504e" -FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" -ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" -HYPRE = "b5ffcf37-a2bd-41ab-a3da-4bd9bc8ad771" -InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" -IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153" -JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b" -KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" -KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77" -MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" -Metal = "dde4c033-4e86-420c-a63e-0dd931031962" -MultiFloats = "bdf0d083-296b-4888-a5b6-7498122e68a5" -Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd" -SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" -StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" -StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" - -[targets] -test = ["Aqua", "Test", "IterativeSolvers", "InteractiveUtils", "JET", "KrylovKit", "Pkg", "Random", "SafeTestsets", "MultiFloats", "ForwardDiff", "HYPRE", "MPI", "BlockDiagonals", "Enzyme", "FiniteDiff", "BandedMatrices", "FastAlmostBandedMatrices", "StaticArrays", "AllocCheck", "StableRNGs", "Zygote"] +name = "LinearSolve" +uuid = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae" +authors = ["SciML"] +version = "2.28.0" + +[deps] +ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" +ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471" +DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56" +FastLapackInterface = "29a986be-02c6-4525-aec4-84b980013641" +GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" +InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +KLU = "ef3ab10e-7fda-4108-b977-705223b18434" +Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" +LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02" +Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MKL_jll = "856f044c-d86e-5d09-b602-aeab76dc8ba7" +Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" +PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +Preferences = "21216c6a-2e73-6563-6e65-726566657250" +RecursiveFactorization = "f2c3362d-daeb-58d1-803e-2bc74f2840b4" +Reexport = "189a3867-3050-52da-a836-e630ba90ab69" +SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" +SciMLOperators = "c0aeaf25-5076-4817-a8d5-81caf7dfa961" +Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +Sparspak = "e56a9233-b9d6-4f03-8d0f-1825330902ac" +StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" +UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" + +[weakdeps] +BandedMatrices = "aae01518-5342-5314-be14-df237901396f" +BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e" +Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" +EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" +FastAlmostBandedMatrices = "9d29842c-ecb8-4973-b1e9-a27b1157504e" +HYPRE = "b5ffcf37-a2bd-41ab-a3da-4bd9bc8ad771" +IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153" +KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" +KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77" +Metal = "dde4c033-4e86-420c-a63e-0dd931031962" +Pardiso = "46dd5b70-b6fb-5a00-ae2d-e8fea33afaf2" +RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd" + +[extensions] +LinearSolveBandedMatricesExt = "BandedMatrices" +LinearSolveBlockDiagonalsExt = "BlockDiagonals" +LinearSolveCUDAExt = "CUDA" +LinearSolveCUDSSExt = "CUDSS" +LinearSolveEnzymeExt = ["Enzyme", "EnzymeCore"] +LinearSolveFastAlmostBandedMatricesExt = ["FastAlmostBandedMatrices"] +LinearSolveHYPREExt = "HYPRE" +LinearSolveIterativeSolversExt = "IterativeSolvers" +LinearSolveKernelAbstractionsExt = "KernelAbstractions" +LinearSolveKrylovKitExt = "KrylovKit" +LinearSolveMetalExt = "Metal" +LinearSolvePardisoExt = "Pardiso" +LinearSolveRecursiveArrayToolsExt = "RecursiveArrayTools" + +[compat] +AllocCheck = "0.1" +Aqua = "0.8" +ArrayInterface = "7.7" +BandedMatrices = "1.5" +BlockDiagonals = "0.1.42" +CUDA = "5" +ChainRulesCore = "1.22" +ConcreteStructs = "0.2.3" +DocStringExtensions = "0.9.3" +EnumX = "1.0.4" +Enzyme = "0.11.15" +EnzymeCore = "0.6.5" +FastAlmostBandedMatrices = "0.1" +FastLapackInterface = "2" +FiniteDiff = "2.22" +ForwardDiff = "0.10.36" +GPUArraysCore = "0.1.6" +HYPRE = "1.4.0" +InteractiveUtils = "1.10" +IterativeSolvers = "0.9.3" +JET = "0.8.28" +KLU = "0.6" +KernelAbstractions = "0.9.16" +Krylov = "0.9" +KrylovKit = "0.6" +LazyArrays = "1.8" +Libdl = "1.10" +LinearAlgebra = "1.10" +MPI = "0.20" +Markdown = "1.10" +Metal = "1" +MultiFloats = "1" +Pardiso = "0.5" +Pkg = "1" +PrecompileTools = "1.2" +Preferences = "1.4" +Random = "1" +RecursiveArrayTools = "3.8" +RecursiveFactorization = "0.2.14" +Reexport = "1" +SafeTestsets = "0.1" +SciMLBase = "2.26.3" +SciMLOperators = "0.3.7" +Setfield = "1" +SparseArrays = "1.10" +Sparspak = "0.3.6" +StableRNGs = "1" +StaticArrays = "1.5" +StaticArraysCore = "1.4.2" +Test = "1" +UnPack = "1" +Zygote = "0.6.69" +julia = "1.10" + +[extras] +AllocCheck = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a" +Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +BandedMatrices = "aae01518-5342-5314-be14-df237901396f" +BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0" +Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" +FastAlmostBandedMatrices = "9d29842c-ecb8-4973-b1e9-a27b1157504e" +FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +HYPRE = "b5ffcf37-a2bd-41ab-a3da-4bd9bc8ad771" +InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153" +JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b" +KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" +KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77" +MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" +Metal = "dde4c033-4e86-420c-a63e-0dd931031962" +MultiFloats = "bdf0d083-296b-4888-a5b6-7498122e68a5" +Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd" +SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" +StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" +StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" + +[targets] +test = ["Aqua", "Test", "IterativeSolvers", "InteractiveUtils", "JET", "KrylovKit", "Pkg", "Random", "SafeTestsets", "MultiFloats", "ForwardDiff", "HYPRE", "MPI", "BlockDiagonals", "Enzyme", "FiniteDiff", "BandedMatrices", "FastAlmostBandedMatrices", "StaticArrays", "AllocCheck", "StableRNGs", "Zygote"] diff --git a/ext/LinearSolveCUDAExt.jl b/ext/LinearSolveCUDAExt.jl index 1cfb0888e..e7c1d7f8c 100644 --- a/ext/LinearSolveCUDAExt.jl +++ b/ext/LinearSolveCUDAExt.jl @@ -1,26 +1,61 @@ -module LinearSolveCUDAExt - -using CUDA -using LinearSolve -using LinearSolve.LinearAlgebra, LinearSolve.SciMLBase, LinearSolve.ArrayInterface -using SciMLBase: AbstractSciMLOperator - -function SciMLBase.solve!(cache::LinearSolve.LinearCache, alg::CudaOffloadFactorization; - kwargs...) - if cache.isfresh - fact = qr(CUDA.CuArray(cache.A)) - cache.cacheval = fact - cache.isfresh = false - end - y = Array(ldiv!(CUDA.CuArray(cache.u), cache.cacheval, CUDA.CuArray(cache.b))) - cache.u .= y - SciMLBase.build_linear_solution(alg, y, nothing, cache) -end - -function LinearSolve.init_cacheval(alg::CudaOffloadFactorization, A, b, u, Pl, Pr, - maxiters::Int, abstol, reltol, verbose::Bool, - assumptions::OperatorAssumptions) - qr(CUDA.CuArray(A)) -end - -end +module LinearSolveCUDAExt + +using CUDA +using LinearSolve +using LinearSolve.LinearAlgebra, LinearSolve.SciMLBase, LinearSolve.ArrayInterface +using SciMLBase: AbstractSciMLOperator + +function LinearSolve.defaultalg(A::CUDA.CUSPARSE.CuSparseMatrixCSR{Tv, Ti}, b, + assump::OperatorAssumptions{Bool}) where {Tv, Ti} + if LinearSolve.cudss_loaded(A) + LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.LUFactorization) + else + if !LinearSolve.ALREADY_WARNED_CUDSS[] + @warn("CUDSS.jl is required for LU Factorizations on CuSparseMatrixCSR. Please load this library. Falling back to Krylov") + LinearSolve.ALREADY_WARNED_CUDSS[] = true + end + LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES) + end +end + +function LinearSolve.error_no_cudss_lu(A::CUDA.CUSPARSE.CuSparseMatrixCSR) + if !LinearSolve.CUDSS_LOADED[] + error("CUDSS.jl is required for LU Factorizations on CuSparseMatrixCSR. Please load this library.") + end + nothing +end + +function SciMLBase.solve!(cache::LinearSolve.LinearCache, alg::CudaOffloadFactorization; + kwargs...) + if cache.isfresh + fact = qr(CUDA.CuArray(cache.A)) + cache.cacheval = fact + cache.isfresh = false + end + y = Array(ldiv!(CUDA.CuArray(cache.u), cache.cacheval, CUDA.CuArray(cache.b))) + cache.u .= y + SciMLBase.build_linear_solution(alg, y, nothing, cache) +end + +function LinearSolve.init_cacheval(alg::CudaOffloadFactorization, A, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + qr(CUDA.CuArray(A)) +end + +function LinearSolve.init_cacheval(::SparspakFactorization, A::CUDA.CUSPARSE.CuSparseMatrixCSR, b, u, + Pl, Pr, maxiters::Int, abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + nothing +end + +function LinearSolve.init_cacheval(::KLUFactorization, A::CUDA.CUSPARSE.CuSparseMatrixCSR, b, u, + Pl, Pr, maxiters::Int, abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + nothing +end + +function LinearSolve.init_cacheval(::UMFPACKFactorization, A::CUDA.CUSPARSE.CuSparseMatrixCSR, b, u, + Pl, Pr, maxiters::Int, abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + nothing +end + +end diff --git a/ext/LinearSolveCUDSSExt.jl b/ext/LinearSolveCUDSSExt.jl new file mode 100644 index 000000000..6bf4da020 --- /dev/null +++ b/ext/LinearSolveCUDSSExt.jl @@ -0,0 +1,8 @@ +module LinearSolveCUDSSExt + +using LinearSolve +using CUDSS + +LinearSolve.cudss_loaded(A::CUDSS.CUDA.CUSPARSE.CuSparseMatrixCSR) = true + +end diff --git a/src/LinearSolve.jl b/src/LinearSolve.jl index bcbc1aa74..57e6d5bd0 100644 --- a/src/LinearSolve.jl +++ b/src/LinearSolve.jl @@ -1,254 +1,258 @@ -module LinearSolve -if isdefined(Base, :Experimental) && - isdefined(Base.Experimental, Symbol("@max_methods")) - @eval Base.Experimental.@max_methods 1 -end - -import PrecompileTools - -PrecompileTools.@recompile_invalidations begin - using ArrayInterface - using RecursiveFactorization - using Base: cache_dependencies, Bool - using LinearAlgebra - using SparseArrays - using SparseArrays: AbstractSparseMatrixCSC, nonzeros, rowvals, getcolptr - using LazyArrays: @~, BroadcastArray - using SciMLBase: AbstractLinearAlgorithm - using SciMLOperators - using SciMLOperators: AbstractSciMLOperator, IdentityOperator - using Setfield - using UnPack - using KLU - using Sparspak - using FastLapackInterface - using DocStringExtensions - using EnumX - using Markdown - using ChainRulesCore - import InteractiveUtils - - import StaticArraysCore: StaticArray, SVector, MVector, SMatrix, MMatrix - - using LinearAlgebra: BlasInt, LU - using LinearAlgebra.LAPACK: require_one_based_indexing, - chkfinite, chkstride1, - @blasfunc, chkargsok - - import GPUArraysCore - import Preferences - import ConcreteStructs: @concrete - - # wrap - import Krylov - using SciMLBase - import Preferences -end - -const CRC = ChainRulesCore - -if Preferences.@load_preference("LoadMKL_JLL", true) - using MKL_jll - const usemkl = MKL_jll.is_available() -else - const usemkl = false -end - -using Reexport -@reexport using SciMLBase -using SciMLBase: _unwrap_val - -abstract type SciMLLinearSolveAlgorithm <: SciMLBase.AbstractLinearAlgorithm end -abstract type AbstractFactorization <: SciMLLinearSolveAlgorithm end -abstract type AbstractKrylovSubspaceMethod <: SciMLLinearSolveAlgorithm end -abstract type AbstractSolveFunction <: SciMLLinearSolveAlgorithm end - -# Traits - -needs_concrete_A(alg::AbstractFactorization) = true -needs_concrete_A(alg::AbstractKrylovSubspaceMethod) = false -needs_concrete_A(alg::AbstractSolveFunction) = false - -# Util -is_underdetermined(x) = false -is_underdetermined(A::AbstractMatrix) = size(A, 1) < size(A, 2) -is_underdetermined(A::AbstractSciMLOperator) = size(A, 1) < size(A, 2) - -_isidentity_struct(A) = false -_isidentity_struct(λ::Number) = isone(λ) -_isidentity_struct(A::UniformScaling) = isone(A.λ) -_isidentity_struct(::SciMLOperators.IdentityOperator) = true - -# Dispatch Friendly way to check if an extension is loaded -__is_extension_loaded(::Val) = false - -# Check if a sparsity pattern has changed -pattern_changed(fact, A) = false - -function _fast_sym_givens! end - -# Code - -const INCLUDE_SPARSE = Preferences.@load_preference("include_sparse", Base.USE_GPL_LIBS) - -EnumX.@enumx DefaultAlgorithmChoice begin - LUFactorization - QRFactorization - DiagonalFactorization - DirectLdiv! - SparspakFactorization - KLUFactorization - UMFPACKFactorization - KrylovJL_GMRES - GenericLUFactorization - RFLUFactorization - LDLtFactorization - BunchKaufmanFactorization - CHOLMODFactorization - SVDFactorization - CholeskyFactorization - NormalCholeskyFactorization - AppleAccelerateLUFactorization - MKLLUFactorization - QRFactorizationPivoted - KrylovJL_CRAIGMR - KrylovJL_LSMR -end - -struct DefaultLinearSolver <: SciMLLinearSolveAlgorithm - alg::DefaultAlgorithmChoice.T -end - -const BLASELTYPES = Union{Float32, Float64, ComplexF32, ComplexF64} - -include("common.jl") -include("factorization.jl") -include("appleaccelerate.jl") -include("mkl.jl") -include("simplelu.jl") -include("simplegmres.jl") -include("iterative_wrappers.jl") -include("preconditioners.jl") -include("solve_function.jl") -include("default.jl") -include("init.jl") -include("extension_algs.jl") -include("adjoint.jl") -include("deprecated.jl") - -@generated function SciMLBase.solve!(cache::LinearCache, alg::AbstractFactorization; - kwargs...) - quote - if cache.isfresh - fact = do_factorization(alg, cache.A, cache.b, cache.u) - cache.cacheval = fact - cache.isfresh = false - end - y = _ldiv!(cache.u, @get_cacheval(cache, $(Meta.quot(defaultalg_symbol(alg)))), - cache.b) - - #= - retcode = if LinearAlgebra.issuccess(fact) - SciMLBase.ReturnCode.Success - else - SciMLBase.ReturnCode.Failure - end - SciMLBase.build_linear_solution(alg, y, nothing, cache; retcode = retcode) - =# - SciMLBase.build_linear_solution(alg, y, nothing, cache) - end -end - -@static if INCLUDE_SPARSE - include("factorization_sparse.jl") -end - -# Solver Specific Traits -## Needs Square Matrix -""" - needs_square_A(alg) - -Returns `true` if the algorithm requires a square matrix. -""" -needs_square_A(::Nothing) = false # Linear Solve automatically will use a correct alg! -needs_square_A(alg::SciMLLinearSolveAlgorithm) = true -for alg in (:QRFactorization, :FastQRFactorization, :NormalCholeskyFactorization, - :NormalBunchKaufmanFactorization) - @eval needs_square_A(::$(alg)) = false -end -for kralg in (Krylov.lsmr!, Krylov.craigmr!) - @eval needs_square_A(::KrylovJL{$(typeof(kralg))}) = false -end -for alg in (:LUFactorization, :FastLUFactorization, :SVDFactorization, - :GenericFactorization, :GenericLUFactorization, :SimpleLUFactorization, - :RFLUFactorization, :UMFPACKFactorization, :KLUFactorization, :SparspakFactorization, - :DiagonalFactorization, :CholeskyFactorization, :BunchKaufmanFactorization, - :CHOLMODFactorization, :LDLtFactorization, :AppleAccelerateLUFactorization, - :MKLLUFactorization, :MetalLUFactorization) - @eval needs_square_A(::$(alg)) = true -end - -const IS_OPENBLAS = Ref(true) -isopenblas() = IS_OPENBLAS[] - -const HAS_APPLE_ACCELERATE = Ref(false) -appleaccelerate_isavailable() = HAS_APPLE_ACCELERATE[] - -PrecompileTools.@compile_workload begin - A = rand(4, 4) - b = rand(4) - prob = LinearProblem(A, b) - sol = solve(prob) - sol = solve(prob, LUFactorization()) - sol = solve(prob, RFLUFactorization()) - sol = solve(prob, KrylovJL_GMRES()) -end - -@static if INCLUDE_SPARSE - PrecompileTools.@compile_workload begin - A = sprand(4, 4, 0.3) + I - b = rand(4) - prob = LinearProblem(A, b) - sol = solve(prob, KLUFactorization()) - sol = solve(prob, UMFPACKFactorization()) - end -end - -PrecompileTools.@compile_workload begin - A = sprand(4, 4, 0.3) + I - b = rand(4) - prob = LinearProblem(A * A', b) - sol = solve(prob) # in case sparspak is used as default - sol = solve(prob, SparspakFactorization()) -end - -export LUFactorization, SVDFactorization, QRFactorization, GenericFactorization, - GenericLUFactorization, SimpleLUFactorization, RFLUFactorization, - NormalCholeskyFactorization, NormalBunchKaufmanFactorization, - UMFPACKFactorization, KLUFactorization, FastLUFactorization, FastQRFactorization, - SparspakFactorization, DiagonalFactorization, CholeskyFactorization, - BunchKaufmanFactorization, CHOLMODFactorization, LDLtFactorization - -export LinearSolveFunction, DirectLdiv! - -export KrylovJL, KrylovJL_CG, KrylovJL_MINRES, KrylovJL_GMRES, - KrylovJL_BICGSTAB, KrylovJL_LSMR, KrylovJL_CRAIGMR, - IterativeSolversJL, IterativeSolversJL_CG, IterativeSolversJL_GMRES, - IterativeSolversJL_BICGSTAB, IterativeSolversJL_MINRES, IterativeSolversJL_IDRS, - KrylovKitJL, KrylovKitJL_CG, KrylovKitJL_GMRES - -export SimpleGMRES - -export HYPREAlgorithm -export CudaOffloadFactorization -export MKLPardisoFactorize, MKLPardisoIterate -export PardisoJL -export MKLLUFactorization -export AppleAccelerateLUFactorization -export MetalLUFactorization - -export OperatorAssumptions, OperatorCondition - -export LinearSolveAdjoint - -end +module LinearSolve +if isdefined(Base, :Experimental) && + isdefined(Base.Experimental, Symbol("@max_methods")) + @eval Base.Experimental.@max_methods 1 +end + +import PrecompileTools + +PrecompileTools.@recompile_invalidations begin + using ArrayInterface + using RecursiveFactorization + using Base: cache_dependencies, Bool + using LinearAlgebra + using SparseArrays + using SparseArrays: AbstractSparseMatrixCSC, nonzeros, rowvals, getcolptr + using LazyArrays: @~, BroadcastArray + using SciMLBase: AbstractLinearAlgorithm + using SciMLOperators + using SciMLOperators: AbstractSciMLOperator, IdentityOperator + using Setfield + using UnPack + using KLU + using Sparspak + using FastLapackInterface + using DocStringExtensions + using EnumX + using Markdown + using ChainRulesCore + import InteractiveUtils + + import StaticArraysCore: StaticArray, SVector, MVector, SMatrix, MMatrix + + using LinearAlgebra: BlasInt, LU + using LinearAlgebra.LAPACK: require_one_based_indexing, + chkfinite, chkstride1, + @blasfunc, chkargsok + + import GPUArraysCore + import Preferences + import ConcreteStructs: @concrete + + # wrap + import Krylov + using SciMLBase + import Preferences +end + +const CRC = ChainRulesCore + +if Preferences.@load_preference("LoadMKL_JLL", true) + using MKL_jll + const usemkl = MKL_jll.is_available() +else + const usemkl = false +end + +using Reexport +@reexport using SciMLBase +using SciMLBase: _unwrap_val + +abstract type SciMLLinearSolveAlgorithm <: SciMLBase.AbstractLinearAlgorithm end +abstract type AbstractFactorization <: SciMLLinearSolveAlgorithm end +abstract type AbstractKrylovSubspaceMethod <: SciMLLinearSolveAlgorithm end +abstract type AbstractSolveFunction <: SciMLLinearSolveAlgorithm end + +# Traits + +needs_concrete_A(alg::AbstractFactorization) = true +needs_concrete_A(alg::AbstractKrylovSubspaceMethod) = false +needs_concrete_A(alg::AbstractSolveFunction) = false + +# Util +is_underdetermined(x) = false +is_underdetermined(A::AbstractMatrix) = size(A, 1) < size(A, 2) +is_underdetermined(A::AbstractSciMLOperator) = size(A, 1) < size(A, 2) + +_isidentity_struct(A) = false +_isidentity_struct(λ::Number) = isone(λ) +_isidentity_struct(A::UniformScaling) = isone(A.λ) +_isidentity_struct(::SciMLOperators.IdentityOperator) = true + +# Dispatch Friendly way to check if an extension is loaded +__is_extension_loaded(::Val) = false + +# Check if a sparsity pattern has changed +pattern_changed(fact, A) = false + +function _fast_sym_givens! end + +# Code + +const INCLUDE_SPARSE = Preferences.@load_preference("include_sparse", Base.USE_GPL_LIBS) + +EnumX.@enumx DefaultAlgorithmChoice begin + LUFactorization + QRFactorization + DiagonalFactorization + DirectLdiv! + SparspakFactorization + KLUFactorization + UMFPACKFactorization + KrylovJL_GMRES + GenericLUFactorization + RFLUFactorization + LDLtFactorization + BunchKaufmanFactorization + CHOLMODFactorization + SVDFactorization + CholeskyFactorization + NormalCholeskyFactorization + AppleAccelerateLUFactorization + MKLLUFactorization + QRFactorizationPivoted + KrylovJL_CRAIGMR + KrylovJL_LSMR +end + +struct DefaultLinearSolver <: SciMLLinearSolveAlgorithm + alg::DefaultAlgorithmChoice.T +end + +const BLASELTYPES = Union{Float32, Float64, ComplexF32, ComplexF64} + +include("common.jl") +include("factorization.jl") +include("appleaccelerate.jl") +include("mkl.jl") +include("simplelu.jl") +include("simplegmres.jl") +include("iterative_wrappers.jl") +include("preconditioners.jl") +include("solve_function.jl") +include("default.jl") +include("init.jl") +include("extension_algs.jl") +include("adjoint.jl") +include("deprecated.jl") + +@generated function SciMLBase.solve!(cache::LinearCache, alg::AbstractFactorization; + kwargs...) + quote + if cache.isfresh + fact = do_factorization(alg, cache.A, cache.b, cache.u) + cache.cacheval = fact + cache.isfresh = false + end + y = _ldiv!(cache.u, @get_cacheval(cache, $(Meta.quot(defaultalg_symbol(alg)))), + cache.b) + + #= + retcode = if LinearAlgebra.issuccess(fact) + SciMLBase.ReturnCode.Success + else + SciMLBase.ReturnCode.Failure + end + SciMLBase.build_linear_solution(alg, y, nothing, cache; retcode = retcode) + =# + SciMLBase.build_linear_solution(alg, y, nothing, cache) + end +end + +@static if INCLUDE_SPARSE + include("factorization_sparse.jl") +end + +# Solver Specific Traits +## Needs Square Matrix +""" + needs_square_A(alg) + +Returns `true` if the algorithm requires a square matrix. +""" +needs_square_A(::Nothing) = false # Linear Solve automatically will use a correct alg! +needs_square_A(alg::SciMLLinearSolveAlgorithm) = true +for alg in (:QRFactorization, :FastQRFactorization, :NormalCholeskyFactorization, + :NormalBunchKaufmanFactorization) + @eval needs_square_A(::$(alg)) = false +end +for kralg in (Krylov.lsmr!, Krylov.craigmr!) + @eval needs_square_A(::KrylovJL{$(typeof(kralg))}) = false +end +for alg in (:LUFactorization, :FastLUFactorization, :SVDFactorization, + :GenericFactorization, :GenericLUFactorization, :SimpleLUFactorization, + :RFLUFactorization, :UMFPACKFactorization, :KLUFactorization, :SparspakFactorization, + :DiagonalFactorization, :CholeskyFactorization, :BunchKaufmanFactorization, + :CHOLMODFactorization, :LDLtFactorization, :AppleAccelerateLUFactorization, + :MKLLUFactorization, :MetalLUFactorization) + @eval needs_square_A(::$(alg)) = true +end + +const IS_OPENBLAS = Ref(true) +isopenblas() = IS_OPENBLAS[] + +const HAS_APPLE_ACCELERATE = Ref(false) +appleaccelerate_isavailable() = HAS_APPLE_ACCELERATE[] + +PrecompileTools.@compile_workload begin + A = rand(4, 4) + b = rand(4) + prob = LinearProblem(A, b) + sol = solve(prob) + sol = solve(prob, LUFactorization()) + sol = solve(prob, RFLUFactorization()) + sol = solve(prob, KrylovJL_GMRES()) +end + +@static if INCLUDE_SPARSE + PrecompileTools.@compile_workload begin + A = sprand(4, 4, 0.3) + I + b = rand(4) + prob = LinearProblem(A, b) + sol = solve(prob, KLUFactorization()) + sol = solve(prob, UMFPACKFactorization()) + end +end + +PrecompileTools.@compile_workload begin + A = sprand(4, 4, 0.3) + I + b = rand(4) + prob = LinearProblem(A * A', b) + sol = solve(prob) # in case sparspak is used as default + sol = solve(prob, SparspakFactorization()) +end + +ALREADY_WARNED_CUDSS = Ref{Bool}(false) +error_no_cudss_lu(A) = nothing +cudss_loaded(A) = false + +export LUFactorization, SVDFactorization, QRFactorization, GenericFactorization, + GenericLUFactorization, SimpleLUFactorization, RFLUFactorization, + NormalCholeskyFactorization, NormalBunchKaufmanFactorization, + UMFPACKFactorization, KLUFactorization, FastLUFactorization, FastQRFactorization, + SparspakFactorization, DiagonalFactorization, CholeskyFactorization, + BunchKaufmanFactorization, CHOLMODFactorization, LDLtFactorization + +export LinearSolveFunction, DirectLdiv! + +export KrylovJL, KrylovJL_CG, KrylovJL_MINRES, KrylovJL_GMRES, + KrylovJL_BICGSTAB, KrylovJL_LSMR, KrylovJL_CRAIGMR, + IterativeSolversJL, IterativeSolversJL_CG, IterativeSolversJL_GMRES, + IterativeSolversJL_BICGSTAB, IterativeSolversJL_MINRES, IterativeSolversJL_IDRS, + KrylovKitJL, KrylovKitJL_CG, KrylovKitJL_GMRES + +export SimpleGMRES + +export HYPREAlgorithm +export CudaOffloadFactorization +export MKLPardisoFactorize, MKLPardisoIterate +export PardisoJL +export MKLLUFactorization +export AppleAccelerateLUFactorization +export MetalLUFactorization + +export OperatorAssumptions, OperatorCondition + +export LinearSolveAdjoint + +end diff --git a/src/factorization.jl b/src/factorization.jl index 31d52738f..5bccd4151 100644 --- a/src/factorization.jl +++ b/src/factorization.jl @@ -132,6 +132,7 @@ end function init_cacheval(alg::Union{LUFactorization, GenericLUFactorization}, A::Union{<:Adjoint, <:Transpose}, b, u, Pl, Pr, maxiters::Int, abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) + error_no_cudss_lu(A) if alg isa LUFactorization return lu(A; check = false) else From 084d0d6a33a519a139840e0442bc4fc6f699283a Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Fri, 19 Apr 2024 14:33:41 -0400 Subject: [PATCH 4/4] bound CUDSS --- Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Project.toml b/Project.toml index baf6b5359..bbf6af778 100644 --- a/Project.toml +++ b/Project.toml @@ -69,6 +69,7 @@ ArrayInterface = "7.7" BandedMatrices = "1.5" BlockDiagonals = "0.1.42" CUDA = "5" +CUDSS = "0.1" ChainRulesCore = "1.22" ConcreteStructs = "0.2.3" DocStringExtensions = "0.9.3"