From 031639f1a663bdca9b417dc376bea80f09277656 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Thu, 30 Nov 2023 20:11:28 -0500 Subject: [PATCH] Fix DFSane --- src/NonlinearSolve.jl | 2 +- src/dfsane.jl | 229 ++++++++++++------------------------------ src/utils.jl | 15 --- 3 files changed, 63 insertions(+), 183 deletions(-) diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 63d462f18..dacc98910 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -172,7 +172,7 @@ include("raphson.jl") # include("trustRegion.jl") # include("levenberg.jl") include("gaussnewton.jl") -# include("dfsane.jl") +include("dfsane.jl") include("pseudotransient.jl") include("broyden.jl") include("klement.jl") diff --git a/src/dfsane.jl b/src/dfsane.jl index 4b31ff9f3..8dcb1e9ff 100644 --- a/src/dfsane.jl +++ b/src/dfsane.jl @@ -1,8 +1,8 @@ """ - DFSane(; σ_min::Real = 1e-10, σ_max::Real = 1e10, σ_1::Real = 1.0, - M::Int = 10, γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5, - n_exp::Int = 2, η_strategy::Function = (fn_1, n, x_n, f_n) -> fn_1 / n^2, - max_inner_iterations::Int = 1000) + DFSane(; σ_min::Real = 1e-10, σ_max::Real = 1e10, σ_1::Real = 1.0, M::Int = 10, + γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5, n_exp::Int = 2, + η_strategy::Function = (fn_1, n, x_n, f_n) -> fn_1 / n^2, + max_inner_iterations::Int = 100) A low-overhead and allocation-free implementation of the df-sane method for solving large-scale nonlinear systems of equations. For in depth information about all the parameters and the algorithm, @@ -39,34 +39,27 @@ Computation, 75, 1429-1448.](https://www.researchgate.net/publication/220576479_ `f_n` the current residual. Should satisfy ``η > 0`` and ``∑ₖ ηₖ < ∞``. Defaults to ``fn_1 / n^2``. - `max_inner_iterations`: the maximum number of iterations allowed for the inner loop of the - algorithm. Defaults to `1000`. + algorithm. Defaults to `100`. """ -@concrete struct DFSane <: AbstractNonlinearSolveAlgorithm - σ_min - σ_max - σ_1 - M::Int - γ - τ_min - τ_max - n_exp::Int - η_strategy - max_inner_iterations::Int -end - -function DFSane(; σ_min = 1e-10, σ_max = 1e+10, σ_1 = 1.0, M = 10, γ = 1e-4, τ_min = 0.1, - τ_max = 0.5, n_exp = 2, η_strategy::F = (fn_1, n, x_n, f_n) -> fn_1 / n^2, - max_inner_iterations = 1000) where {F} - return DFSane(σ_min, σ_max, σ_1, M, γ, τ_min, τ_max, n_exp, η_strategy, - max_inner_iterations) +@kwdef @concrete struct DFSane <: AbstractNonlinearSolveAlgorithm + σ_min = 1e-10 + σ_max = 1e10 + σ_1 = 1.0 + M::Int = 10 + γ = 1e-4 + τ_min = 0.1 + τ_max = 0.5 + n_exp::Int = 2 + η_strategy = (fn_1, n, x_n, f_n) -> fn_1 / n^2 + max_inner_iterations::Int = 100 end @concrete mutable struct DFSaneCache{iip} <: AbstractNonlinearSolveCache{iip} alg u - uprev + u_cache fu - fuprev + fu_cache du history f_norm @@ -93,36 +86,35 @@ end trace end -get_fu(cache::DFSaneCache) = cache.fu -set_fu!(cache::DFSaneCache, fu) = (cache.fu = fu) - function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::DFSane, args...; alias_u0 = false, maxiters = 1000, abstol = nothing, reltol = nothing, termination_condition = nothing, internalnorm::F = DEFAULT_NORM, kwargs...) where {uType, iip, F} - u = alias_u0 ? prob.u0 : deepcopy(prob.u0) + u = __maybe_unaliased(prob.u0, alias_u0) T = eltype(u) - du, uprev = copy(u), copy(u) + @bb du = similar(u) + @bb u_cache = copy(u) + fu = evaluate_f(prob, u) - fuprev = copy(fu) + @bb fu_cache = copy(fu) f_norm = internalnorm(fu)^alg.n_exp f_norm_0 = f_norm history = fill(f_norm, alg.M) - abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fu, uprev, + abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fu, u_cache, termination_condition) trace = init_nonlinearsolve_trace(alg, u, fu, nothing, du; kwargs...) - return DFSaneCache{iip}(alg, u, uprev, fu, fuprev, du, history, f_norm, f_norm_0, alg.M, - T(alg.σ_1), T(alg.σ_min), T(alg.σ_max), one(T), T(alg.γ), T(alg.τ_min), + return DFSaneCache{iip}(alg, u, u_cache, fu, fu_cache, du, history, f_norm, f_norm_0, + alg.M, T(alg.σ_1), T(alg.σ_min), T(alg.σ_max), one(T), T(alg.γ), T(alg.τ_min), T(alg.τ_max), alg.n_exp, prob.p, false, maxiters, internalnorm, ReturnCode.Default, abstol, reltol, prob, NLStats(1, 0, 0, 0, 0), tc_cache, trace) end -function perform_step!(cache::DFSaneCache{true}) +function perform_step!(cache::DFSaneCache{iip}) where {iip} @unpack alg, f_norm, σ_n, σ_min, σ_max, α_1, γ, τ_min, τ_max, n_exp, M, prob = cache T = eltype(cache.u) f_norm_old = f_norm @@ -131,128 +123,64 @@ function perform_step!(cache::DFSaneCache{true}) σ_n = sign(σ_n) * clamp(abs(σ_n), σ_min, σ_max) # Line search direction - @. cache.du = -σ_n * cache.fuprev + @bb @. cache.du = -σ_n * cache.fu η = alg.η_strategy(cache.f_norm_0, cache.stats.nsteps, cache.u, cache.fu) f_bar = maximum(cache.history) α₊ = α_1 α₋ = α_1 - _axpy!(α₊, cache.du, cache.u) - - prob.f(cache.fu, cache.u, cache.p) - f_norm = cache.internalnorm(cache.fu)^n_exp - - # TODO: Failure mode with inner line search failed? - for _ in 1:(cache.alg.max_inner_iterations) - c = f_bar + η - γ * α₊^2 * f_norm_old - - f_norm ≤ c && break - - α₊ = α₊ * clamp(α₊ * f_norm_old / (f_norm + (T(2) * α₊ - T(1)) * f_norm_old), - τ_min, τ_max) - @. cache.u = cache.uprev - α₋ * cache.du - - prob.f(cache.fu, cache.u, cache.p) - f_norm = cache.internalnorm(cache.fu)^n_exp - - f_norm ≤ c && break - - α₋ = α₋ * clamp(α₋ * f_norm_old / (f_norm + (T(2) * α₋ - T(1)) * f_norm_old), - τ_min, τ_max) - @. cache.u = cache.uprev + α₊ * cache.du - - prob.f(cache.fu, cache.u, cache.p) - f_norm = cache.internalnorm(cache.fu)^n_exp - end - - update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), get_fu(cache), nothing, - cache.du, α₊) - check_and_update!(cache, cache.fu, cache.u, cache.uprev) + @bb axpy!(α₊, cache.du, cache.u) - # Update spectral parameter - @. cache.uprev = cache.u - cache.uprev - @. cache.fuprev = cache.fu - cache.fuprev - - α₊ = sum(abs2, cache.uprev) - @. cache.uprev *= cache.fuprev - α₋ = sum(cache.uprev) - cache.σ_n = α₊ / α₋ - - # Spectral parameter bounds check - if !(σ_min ≤ abs(cache.σ_n) ≤ σ_max) - test_norm = sqrt(sum(abs2, cache.fuprev)) - cache.σ_n = clamp(inv(test_norm), T(1), T(1e5)) - end - - # Take step - @. cache.uprev = cache.u - @. cache.fuprev = cache.fu - cache.f_norm = f_norm - - # Update history - cache.history[cache.stats.nsteps % M + 1] = f_norm - cache.stats.nf += 1 - return nothing -end - -function perform_step!(cache::DFSaneCache{false}) - @unpack alg, f_norm, σ_n, σ_min, σ_max, α_1, γ, τ_min, τ_max, n_exp, M, prob = cache - T = eltype(cache.u) - f_norm_old = f_norm - - # Spectral parameter range check - σ_n = sign(σ_n) * clamp(abs(σ_n), σ_min, σ_max) - - # Line search direction - cache.du = @. -σ_n * cache.fuprev - - η = alg.η_strategy(cache.f_norm_0, cache.stats.nsteps, cache.u, cache.fu) - - f_bar = maximum(cache.history) - α₊ = α_1 - α₋ = α_1 - cache.u = @. cache.uprev + α₊ * cache.du - - cache.fu = prob.f(cache.u, cache.p) + evaluate_f(cache, cache.u, cache.p) f_norm = cache.internalnorm(cache.fu)^n_exp + α = α₊ - # TODO: Failure mode with inner line search failed? - for _ in 1:(cache.alg.max_inner_iterations) - c = f_bar + η - γ * α₊^2 * f_norm_old - - f_norm ≤ c && break + inner_converged = false + for k in 1:(cache.alg.max_inner_iterations) + if f_norm ≤ f_bar + η - γ * α₊^2 * f_norm_old + α = α₊ + inner_converged = true + break + end α₊ = α₊ * clamp(α₊ * f_norm_old / (f_norm + (T(2) * α₊ - T(1)) * f_norm_old), τ_min, τ_max) - cache.u = @. cache.uprev - α₋ * cache.du + @bb axpy!(-α₋, cache.du, cache.u) - cache.fu = prob.f(cache.u, cache.p) + evaluate_f(cache, cache.u, cache.p) f_norm = cache.internalnorm(cache.fu)^n_exp - f_norm ≤ c && break + if f_norm ≤ f_bar + η - γ * α₋^2 * f_norm_old + α = α₋ + inner_converged = true + break + end α₋ = α₋ * clamp(α₋ * f_norm_old / (f_norm + (T(2) * α₋ - T(1)) * f_norm_old), τ_min, τ_max) - cache.u = @. cache.uprev + α₊ * cache.du + @bb axpy!(α₊, cache.du, cache.u) - cache.fu = prob.f(cache.u, cache.p) + evaluate_f(cache, cache.u, cache.p) f_norm = cache.internalnorm(cache.fu)^n_exp end - update_trace!(cache.trace, cache.stats.nsteps + 1, get_u(cache), get_fu(cache), nothing, - cache.du, α₊) + if !inner_converged + cache.retcode = ReturnCode.ConvergenceFailure + cache.force_stop = true + end - check_and_update!(cache, cache.fu, cache.u, cache.uprev) + update_trace!(cache, α) + check_and_update!(cache, cache.fu, cache.u, cache.u_cache) # Update spectral parameter - cache.uprev = @. cache.u - cache.uprev - cache.fuprev = @. cache.fu - cache.fuprev + @bb @. cache.u_cache = cache.u - cache.u_cache + @bb @. cache.fu_cache = cache.fu - cache.fu_cache - α₊ = sum(abs2, cache.uprev) - cache.uprev = @. cache.uprev * cache.fuprev - α₋ = sum(cache.uprev) + α₊ = sum(abs2, cache.u_cache) + @bb @. cache.u_cache *= cache.fu_cache + α₋ = sum(cache.u_cache) cache.σ_n = α₊ / α₋ # Spectral parameter bounds check @@ -262,8 +190,8 @@ function perform_step!(cache::DFSaneCache{false}) end # Take step - cache.uprev = cache.u - cache.fuprev = cache.fu + @bb copyto!(cache.u_cache, cache.u) + @bb copyto!(cache.fu_cache, cache.fu) cache.f_norm = f_norm # Update history @@ -272,41 +200,8 @@ function perform_step!(cache::DFSaneCache{false}) return nothing end -function SciMLBase.reinit!(cache::DFSaneCache{iip}, u0 = cache.u; p = cache.p, - abstol = cache.abstol, reltol = cache.reltol, maxiters = cache.maxiters, - termination_condition = get_termination_mode(cache.tc_cache)) where {iip} - cache.p = p - if iip - recursivecopy!(cache.u, u0) - recursivecopy!(cache.uprev, u0) - cache.prob.f(cache.fu, cache.u, p) - cache.prob.f(cache.fuprev, cache.uprev, p) - else - cache.u = u0 - cache.uprev = u0 - cache.fu = cache.prob.f(cache.u, p) - cache.fuprev = cache.prob.f(cache.uprev, p) - end - +function __reinit_internal!(cache::DFSaneCache; kwargs...) cache.f_norm = cache.internalnorm(cache.fu)^cache.n_exp cache.f_norm_0 = cache.f_norm - - fill!(cache.history, cache.f_norm) - - T = eltype(cache.u) - cache.σ_n = T(cache.alg.σ_1) - - reset!(cache.trace) - abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, cache.fu, cache.u, - termination_condition) - - cache.abstol = abstol - cache.reltol = reltol - cache.tc_cache = tc_cache - cache.maxiters = maxiters - cache.stats.nf = 1 - cache.stats.nsteps = 1 - cache.force_stop = false - cache.retcode = ReturnCode.Default - return cache + return end diff --git a/src/utils.jl b/src/utils.jl index 00b7d3726..90b882af3 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -206,14 +206,6 @@ function evaluate_f(cache, u, p) return nothing end -""" - __matmul!(C, A, B) - -Defaults to `mul!(C, A, B)`. However, for sparse matrices uses `C .= A * B`. -""" -__matmul!(C, A, B) = mul!(C, A, B) -__matmul!(C::AbstractSparseMatrix, A, B) = C .= A * B - # Concretize Algorithms function get_concrete_algorithm(alg, prob) !hasfield(typeof(alg), :ad) && return alg @@ -381,15 +373,8 @@ function __try_factorize_and_check_singular!(linsolve, X) end __try_factorize_and_check_singular!(::FakeLinearSolveJLCache, x) = _issingular(x), false -# TODO: Remove. handled in MaybeInplace.jl -@generated function _axpy!(α, x, y) - hasmethod(axpy!, Tuple{α, x, y}) && return :(axpy!(α, x, y)) - return :(@. y += α * x) -end - # Non-square matrix @inline __needs_square_A(_, ::Number) = true -# @inline __needs_square_A(_, ::StaticArray) = true @inline __needs_square_A(alg, _) = LinearSolve.needs_square_A(alg.linsolve) # Define special concatenation for certain Array combinations