From 910a30a7d1c76c16e6d800aa4a8f6f65f45b11d9 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Mon, 23 Oct 2023 01:10:03 -0400 Subject: [PATCH] Add Limited Memory Broyden --- src/NonlinearSolve.jl | 3 +- src/broyden.jl | 37 ++++-- src/lbroyden.jl | 260 +++++++++++++++++++++++++++++++++++++++ src/utils.jl | 12 ++ test/23_test_problems.jl | 2 + test/basictests.jl | 90 ++++++++++++++ test/matrix_resizing.jl | 3 +- 7 files changed, 396 insertions(+), 11 deletions(-) create mode 100644 src/lbroyden.jl diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 8d3a8ca2b..2b26b3721 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -76,6 +76,7 @@ include("dfsane.jl") include("pseudotransient.jl") include("broyden.jl") include("klement.jl") +include("lbroyden.jl") include("jacobian.jl") include("ad.jl") include("default.jl") @@ -106,7 +107,7 @@ end export RadiusUpdateSchemes export NewtonRaphson, TrustRegion, LevenbergMarquardt, DFSane, GaussNewton, PseudoTransient, - GeneralBroyden, GeneralKlement + GeneralBroyden, GeneralKlement, LimitedMemoryBroyden export LeastSquaresOptimJL, FastLevenbergMarquardtJL export RobustMultiNewton, FastShortcutNonlinearPolyalg diff --git a/src/broyden.jl b/src/broyden.jl index 6a767a8af..3232a2d9f 100644 --- a/src/broyden.jl +++ b/src/broyden.jl @@ -1,13 +1,14 @@ # Sadly `Broyden` is taken up by SimpleNonlinearSolve.jl """ - GeneralBroyden(max_resets, linesearch) - GeneralBroyden(; max_resets = 3, linesearch = LineSearch()) + GeneralBroyden(; max_resets = 3, linesearch = LineSearch(), reset_tolerance = nothing) An implementation of `Broyden` with reseting and line search. ## Arguments - `max_resets`: the maximum number of resets to perform. Defaults to `3`. + - `reset_tolerance`: the tolerance for the reset check. Defaults to + `sqrt(eps(eltype(u)))`. - `linesearch`: the line search algorithm to use. Defaults to [`LineSearch()`](@ref), which means that no line search is performed. Algorithms from `LineSearches.jl` can be used here directly, and they will be converted to the correct `LineSearch`. It is @@ -16,12 +17,14 @@ An implementation of `Broyden` with reseting and line search. """ @concrete struct GeneralBroyden <: AbstractNewtonAlgorithm{false, Nothing} max_resets::Int + reset_tolerance linesearch end -function GeneralBroyden(; max_resets = 3, linesearch = LineSearch()) +function GeneralBroyden(; max_resets = 3, linesearch = LineSearch(), + reset_tolerance = nothing) linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch) - return GeneralBroyden(max_resets, linesearch) + return GeneralBroyden(max_resets, reset_tolerance, linesearch) end @concrete mutable struct GeneralBroydenCache{iip} <: AbstractNonlinearSolveCache{iip} @@ -43,6 +46,8 @@ end internalnorm retcode::ReturnCode.T abstol + reset_tolerance + reset_check prob stats::NLStats lscache @@ -57,9 +62,13 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralBroyde u = alias_u0 ? u0 : deepcopy(u0) fu = evaluate_f(prob, u) J⁻¹ = __init_identity_jacobian(u, fu) + reset_tolerance = alg.reset_tolerance === nothing ? sqrt(eps(eltype(u))) : + alg.reset_tolerance + reset_check = x -> abs(x) ≤ reset_tolerance return GeneralBroydenCache{iip}(f, alg, u, _mutable_zero(u), fu, zero(fu), zero(fu), p, J⁻¹, zero(_vec(fu)'), _mutable_zero(u), false, 0, alg.max_resets, - maxiters, internalnorm, ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0), + maxiters, internalnorm, ReturnCode.Default, abstol, reset_tolerance, + reset_check, prob, NLStats(1, 0, 0, 0, 0), init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip))) end @@ -79,8 +88,13 @@ function perform_step!(cache::GeneralBroydenCache{true}) # Update the inverse jacobian dfu .= fu2 .- fu - if cache.resets < cache.max_resets && - (all(x -> abs(x) ≤ 1e-12, du) || all(x -> abs(x) ≤ 1e-12, dfu)) + + if all(cache.reset_check, du) || all(cache.reset_check, dfu) + if cache.resets ≥ cache.max_resets + cache.retcode = ReturnCode.Unstable + cache.force_stop = true + return nothing + end fill!(J⁻¹, 0) J⁻¹[diagind(J⁻¹)] .= T(1) cache.resets += 1 @@ -111,8 +125,12 @@ function perform_step!(cache::GeneralBroydenCache{false}) # Update the inverse jacobian cache.dfu = cache.fu2 .- cache.fu - if cache.resets < cache.max_resets && - (all(x -> abs(x) ≤ 1e-12, cache.du) || all(x -> abs(x) ≤ 1e-12, cache.dfu)) + if all(cache.reset_check, cache.du) || all(cache.reset_check, cache.dfu) + if cache.resets ≥ cache.max_resets + cache.retcode = ReturnCode.Unstable + cache.force_stop = true + return nothing + end cache.J⁻¹ = __init_identity_jacobian(cache.u, cache.fu) cache.resets += 1 else @@ -141,6 +159,7 @@ function SciMLBase.reinit!(cache::GeneralBroydenCache{iip}, u0 = cache.u; p = ca cache.maxiters = maxiters cache.stats.nf = 1 cache.stats.nsteps = 1 + cache.resets = 0 cache.force_stop = false cache.retcode = ReturnCode.Default return cache diff --git a/src/lbroyden.jl b/src/lbroyden.jl new file mode 100644 index 000000000..458db8104 --- /dev/null +++ b/src/lbroyden.jl @@ -0,0 +1,260 @@ +""" + LimitedMemoryBroyden(; max_resets::Int = 3, linesearch = LineSearch(), + threshold::Int = 10, reset_tolerance = nothing) + +An implementation of `LimitedMemoryBroyden` with reseting and line search. + +## Arguments + + - `max_resets`: the maximum number of resets to perform. Defaults to `3`. + - `reset_tolerance`: the tolerance for the reset check. Defaults to + `sqrt(eps(eltype(u)))`. + - `threshold`: the number of vectors to store in the low rank approximation. Defaults + to `10`. + - `linesearch`: the line search algorithm to use. Defaults to [`LineSearch()`](@ref), + which means that no line search is performed. Algorithms from `LineSearches.jl` can be + used here directly, and they will be converted to the correct `LineSearch`. It is + recommended to use [LiFukushimaLineSearchCache](@ref) -- a derivative free linesearch + specifically designed for Broyden's method. +""" +@concrete struct LimitedMemoryBroyden <: AbstractNewtonAlgorithm{false, Nothing} + max_resets::Int + threshold::Int + linesearch + reset_tolerance +end + +function LimitedMemoryBroyden(; max_resets::Int = 3, linesearch = LineSearch(), + threshold::Int = 10, reset_tolerance = nothing) + linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch) + return LimitedMemoryBroyden(max_resets, threshold, linesearch, reset_tolerance) +end + +@concrete mutable struct LimitedMemoryBroydenCache{iip} <: AbstractNonlinearSolveCache{iip} + f + alg + u + du + fu + fu2 + dfu + p + U + Vᵀ + Ux + xᵀVᵀ + u_cache + vᵀ_cache + force_stop::Bool + resets::Int + iterations_since_reset::Int + max_resets::Int + maxiters::Int + internalnorm + retcode::ReturnCode.T + abstol + reset_tolerance + reset_check + prob + stats::NLStats + lscache +end + +get_fu(cache::LimitedMemoryBroydenCache) = cache.fu + +function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LimitedMemoryBroyden, + args...; alias_u0 = false, maxiters = 1000, abstol = 1e-6, internalnorm = DEFAULT_NORM, + kwargs...) where {uType, iip} + @unpack f, u0, p = prob + u = alias_u0 ? u0 : deepcopy(u0) + if u isa Number + # If u is a number then we simply use Broyden + return SciMLBase.__init(prob, + GeneralBroyden(; alg.max_resets, alg.reset_tolerance, + alg.linesearch), args...; alias_u0, maxiters, abstol, internalnorm, kwargs...) + end + fu = evaluate_f(prob, u) + threshold = min(alg.threshold, maxiters) + U, Vᵀ = __init_low_rank_jacobian(u, fu, threshold) + du = -fu + reset_tolerance = alg.reset_tolerance === nothing ? sqrt(eps(eltype(u))) : + alg.reset_tolerance + reset_check = x -> abs(x) ≤ reset_tolerance + return LimitedMemoryBroydenCache{iip}(f, alg, u, du, fu, zero(fu), + zero(fu), p, U, Vᵀ, similar(u, threshold), similar(u, 1, threshold), + zero(u), zero(u), false, 0, 0, alg.max_resets, maxiters, internalnorm, + ReturnCode.Default, abstol, reset_tolerance, reset_check, prob, + NLStats(1, 0, 0, 0, 0), + init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip))) +end + +function perform_step!(cache::LimitedMemoryBroydenCache{true}) + @unpack f, p, du, u = cache + T = eltype(u) + + α = perform_linesearch!(cache.lscache, u, du) + axpy!(α, du, u) + f(cache.fu2, u, p) + + cache.internalnorm(cache.fu2) < cache.abstol && (cache.force_stop = true) + cache.stats.nf += 1 + + cache.force_stop && return nothing + + # Update the Inverse Jacobian Approximation + cache.dfu .= cache.fu2 .- cache.fu + + # Only try to reset if we have enough iterations since last reset + if cache.iterations_since_reset > size(cache.U, 1) && + (all(cache.reset_check, du) || all(cache.reset_check, cache.dfu)) + if cache.resets ≥ cache.max_resets + cache.retcode = ReturnCode.Unstable + cache.force_stop = true + return nothing + end + cache.iterations_since_reset = 0 + cache.resets += 1 + cache.du .= -cache.fu + else + idx = min(cache.iterations_since_reset, size(cache.U, 1)) + U_part = selectdim(cache.U, 1, 1:idx) + Vᵀ_part = selectdim(cache.Vᵀ, 2, 1:idx) + + __lbroyden_matvec!(_vec(cache.vᵀ_cache), cache.Ux, U_part, Vᵀ_part, _vec(cache.du)) + __lbroyden_rmatvec!(_vec(cache.u_cache), cache.xᵀVᵀ, U_part, Vᵀ_part, + _vec(cache.dfu)) + cache.u_cache .= (du .- cache.u_cache) ./ + (dot(cache.vᵀ_cache, cache.dfu) .+ T(1e-5)) + + idx = mod1(cache.iterations_since_reset + 1, size(cache.U, 1)) + selectdim(cache.U, 1, idx) .= _vec(cache.u_cache) + selectdim(cache.Vᵀ, 2, idx) .= _vec(cache.vᵀ_cache) + + idx = min(cache.iterations_since_reset + 1, size(cache.U, 1)) + U_part = selectdim(cache.U, 1, 1:idx) + Vᵀ_part = selectdim(cache.Vᵀ, 2, 1:idx) + __lbroyden_matvec!(_vec(cache.du), cache.Ux, U_part, Vᵀ_part, _vec(cache.fu2)) + cache.du .*= -1 + cache.iterations_since_reset += 1 + end + + cache.fu .= cache.fu2 + + return nothing +end + +function perform_step!(cache::LimitedMemoryBroydenCache{false}) + @unpack f, p = cache + T = eltype(cache.u) + + α = perform_linesearch!(cache.lscache, cache.u, cache.du) + cache.u = cache.u .+ α * cache.du + cache.fu2 = f(cache.u, p) + + cache.internalnorm(cache.fu2) < cache.abstol && (cache.force_stop = true) + cache.stats.nf += 1 + + cache.force_stop && return nothing + + # Update the Inverse Jacobian Approximation + cache.dfu .= cache.fu2 .- cache.fu + + # Only try to reset if we have enough iterations since last reset + if cache.iterations_since_reset > size(cache.U, 1) && + (all(cache.reset_check, cache.du) || all(cache.reset_check, cache.dfu)) + if cache.resets ≥ cache.max_resets + cache.retcode = ReturnCode.Unstable + cache.force_stop = true + return nothing + end + cache.iterations_since_reset = 0 + cache.resets += 1 + cache.du = -cache.fu + else + idx = min(cache.iterations_since_reset, size(cache.U, 1)) + U_part = selectdim(cache.U, 1, 1:idx) + Vᵀ_part = selectdim(cache.Vᵀ, 2, 1:idx) + + cache.vᵀ_cache = _restructure(cache.vᵀ_cache, + __lbroyden_matvec(U_part, Vᵀ_part, _vec(cache.du))) + cache.u_cache = _restructure(cache.u_cache, + __lbroyden_rmatvec(U_part, Vᵀ_part, _vec(cache.dfu))) + cache.u_cache = (cache.du .- cache.u_cache) ./ + (dot(cache.vᵀ_cache, cache.dfu) .+ T(1e-5)) + + idx = mod1(cache.iterations_since_reset + 1, size(cache.U, 1)) + selectdim(cache.U, 1, idx) .= _vec(cache.u_cache) + selectdim(cache.Vᵀ, 2, idx) .= _vec(cache.vᵀ_cache) + + idx = min(cache.iterations_since_reset + 1, size(cache.U, 1)) + U_part = selectdim(cache.U, 1, 1:idx) + Vᵀ_part = selectdim(cache.Vᵀ, 2, 1:idx) + cache.du = _restructure(cache.du, + -__lbroyden_matvec(U_part, Vᵀ_part, _vec(cache.fu2))) + cache.iterations_since_reset += 1 + end + + cache.fu = cache.fu2 + + return nothing +end + +function SciMLBase.reinit!(cache::LimitedMemoryBroydenCache{iip}, u0 = cache.u; p = cache.p, + abstol = cache.abstol, maxiters = cache.maxiters) where {iip} + cache.p = p + if iip + recursivecopy!(cache.u, u0) + cache.f(cache.fu, cache.u, p) + else + # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter + cache.u = u0 + cache.fu = cache.f(cache.u, p) + end + cache.abstol = abstol + cache.maxiters = maxiters + cache.stats.nf = 1 + cache.stats.nsteps = 1 + cache.resets = 0 + cache.iterations_since_reset = 0 + cache.force_stop = false + cache.retcode = ReturnCode.Default + return cache +end + +@views function __lbroyden_matvec!(y::AbstractVector, Ux::AbstractVector, + U::AbstractMatrix, Vᵀ::AbstractMatrix, x::AbstractVector) + # Computes Vᵀ × U × x + η = size(U, 1) + if η == 0 + y .= x + return nothing + end + mul!(Ux[1:η], U, x) + mul!(y, Vᵀ[:, 1:η], Ux[1:η]) + return nothing +end + +@views function __lbroyden_matvec(U::AbstractMatrix, Vᵀ::AbstractMatrix, x::AbstractVector) + # Computes Vᵀ × U × x + size(U, 1) == 0 && return x + return Vᵀ * (U * x) +end + +@views function __lbroyden_rmatvec!(y::AbstractVector, xᵀVᵀ::AbstractMatrix, + U::AbstractMatrix, Vᵀ::AbstractMatrix, x::AbstractVector) + # Computes xᵀ × Vᵀ × U + η = size(U, 1) + if η == 0 + y .= x + return nothing + end + mul!(xᵀVᵀ[:, 1:η], x', Vᵀ) + mul!(y', xᵀVᵀ[:, 1:η], U) + return nothing +end + +@views function __lbroyden_rmatvec(U::AbstractMatrix, Vᵀ::AbstractMatrix, x::AbstractVector) + # Computes xᵀ × Vᵀ × U + size(U, 1) == 0 && return x + return (x' * Vᵀ) * U +end diff --git a/src/utils.jl b/src/utils.jl index 3369e5c01..c99b05bf4 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -222,6 +222,18 @@ function __init_identity_jacobian(u::StaticArray, fu) Matrix{eltype(u)}(I, length(fu), length(u))) end +function __init_low_rank_jacobian(u::StaticArray, fu, threshold::Int) + Vᵀ = convert(MArray{Tuple{length(u), threshold}}, + zeros(eltype(u), length(u), threshold)) + U = convert(MArray{Tuple{threshold, length(u)}}, zeros(eltype(u), threshold, length(u))) + return U, Vᵀ +end +function __init_low_rank_jacobian(u, fu, threshold::Int) + Vᵀ = convert(parameterless_type(_mutable(u)), zeros(eltype(u), length(u), threshold)) + U = convert(parameterless_type(_mutable(u)), zeros(eltype(u), threshold, length(u))) + return U, Vᵀ +end + # Check Singular Matrix _issingular(x::Number) = iszero(x) @generated function _issingular(x::T) where {T} diff --git a/test/23_test_problems.jl b/test/23_test_problems.jl index 25bb8a68a..3564aa187 100644 --- a/test/23_test_problems.jl +++ b/test/23_test_problems.jl @@ -105,3 +105,5 @@ end test_on_library(problems, dicts, alg_ops, broken_tests) end + +# NOTE: Not adding LimitedMemoryBroyden here since it fails on most of the preoblems diff --git a/test/basictests.jl b/test/basictests.jl index a39d25656..681d506de 100644 --- a/test/basictests.jl +++ b/test/basictests.jl @@ -843,3 +843,93 @@ end @test nlprob_iterator_interface(quadratic_f, p, Val(false)) ≈ sqrt.(p) @test nlprob_iterator_interface(quadratic_f!, p, Val(true)) ≈ sqrt.(p) end + +# --- LimitedMemoryBroyden tests --- + +@testset "LimitedMemoryBroyden" begin + function benchmark_nlsolve_oop(f, u0, p = 2.0; linesearch = LineSearch()) + prob = NonlinearProblem{false}(f, u0, p) + return solve(prob, LimitedMemoryBroyden(; linesearch), abstol = 1e-9) + end + + function benchmark_nlsolve_iip(f, u0, p = 2.0; linesearch = LineSearch()) + prob = NonlinearProblem{true}(f, u0, p) + return solve(prob, LimitedMemoryBroyden(; linesearch), abstol = 1e-9) + end + + @testset "LineSearch: $(_nameof(lsmethod)) LineSearch AD: $(_nameof(ad))" for lsmethod in (Static(), + StrongWolfe(), BackTracking(), HagerZhang(), MoreThuente(), + LiFukushimaLineSearch()), + ad in (AutoFiniteDiff(), AutoZygote()) + + linesearch = LineSearch(; method = lsmethod, autodiff = ad) + u0s = ([1.0, 1.0], @SVector[1.0, 1.0], 1.0) + + @testset "[OOP] u0: $(typeof(u0))" for u0 in u0s + sol = benchmark_nlsolve_oop(quadratic_f, u0; linesearch) + @test SciMLBase.successful_retcode(sol) + @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9) + + cache = init(NonlinearProblem{false}(quadratic_f, u0, 2.0), + LimitedMemoryBroyden(; linesearch), abstol = 1e-9) + @test (@ballocated solve!($cache)) < 200 + end + + @testset "[IIP] u0: $(typeof(u0))" for u0 in ([1.0, 1.0],) + ad isa AutoZygote && continue + sol = benchmark_nlsolve_iip(quadratic_f!, u0; linesearch) + @test SciMLBase.successful_retcode(sol) + @test all(abs.(sol.u .* sol.u .- 2) .< 1e-9) + + cache = init(NonlinearProblem{true}(quadratic_f!, u0, 2.0), + LimitedMemoryBroyden(; linesearch), abstol = 1e-9) + @test (@ballocated solve!($cache)) ≤ 64 + end + end + + @testset "[OOP] [Immutable AD]" begin + for p in 1.0:0.1:100.0 + @test begin + res = benchmark_nlsolve_oop(quadratic_f, @SVector[1.0, 1.0], p) + res_true = sqrt(p) + all(((x, y),) -> isapprox(x, y; atol = 1e-3), zip(res.u, res_true)) + end + @test ForwardDiff.derivative(p -> benchmark_nlsolve_oop(quadratic_f, + @SVector[1.0, 1.0], p).u[end], p)≈1 / (2 * sqrt(p)) atol=1e-3 + end + end + + @testset "[OOP] [Scalar AD]" begin + for p in 1.0:0.1:100.0 + @test begin + res = benchmark_nlsolve_oop(quadratic_f, 1.0, p) + res_true = sqrt(p) + res.u ≈ res_true + end + @test ForwardDiff.derivative(p -> benchmark_nlsolve_oop(quadratic_f, 1.0, p).u, + p) ≈ 1 / (2 * sqrt(p)) + end + end + + t = (p) -> [sqrt(p[2] / p[1])] + p = [0.9, 50.0] + @test benchmark_nlsolve_oop(quadratic_f2, 0.5, p).u ≈ sqrt(p[2] / p[1]) + @test ForwardDiff.jacobian(p -> [benchmark_nlsolve_oop(quadratic_f2, 0.5, p).u], + p) ≈ ForwardDiff.jacobian(t, p) + + # Iterator interface + function nlprob_iterator_interface(f, p_range, ::Val{iip}) where {iip} + probN = NonlinearProblem{iip}(f, iip ? [0.5] : 0.5, p_range[begin]) + cache = init(probN, LimitedMemoryBroyden(); maxiters = 100, abstol = 1e-10) + sols = zeros(length(p_range)) + for (i, p) in enumerate(p_range) + reinit!(cache, iip ? [cache.u[1]] : cache.u; p = p) + sol = solve!(cache) + sols[i] = iip ? sol.u[1] : sol.u + end + return sols + end + p = range(0.01, 2, length = 200) + @test nlprob_iterator_interface(quadratic_f, p, Val(false))≈sqrt.(p) atol=1e-2 + @test nlprob_iterator_interface(quadratic_f!, p, Val(true))≈sqrt.(p) atol=1e-2 +end diff --git a/test/matrix_resizing.jl b/test/matrix_resizing.jl index 9c16fe0fe..1d9462fa1 100644 --- a/test/matrix_resizing.jl +++ b/test/matrix_resizing.jl @@ -7,7 +7,8 @@ vecprob = NonlinearProblem(ff, vec(u0), p) prob = NonlinearProblem(ff, u0, p) for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(), - RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden(), GeneralKlement()) + RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden(), GeneralKlement(), + LimitedMemoryBroyden()) @test vec(solve(prob, alg).u) == solve(vecprob, alg).u end