diff --git a/docs/src/api/nonlinearsolve.md b/docs/src/api/nonlinearsolve.md index 5da204f07..3e6009017 100644 --- a/docs/src/api/nonlinearsolve.md +++ b/docs/src/api/nonlinearsolve.md @@ -16,7 +16,9 @@ GeneralKlement ## Polyalgorithms ```@docs +NonlinearSolvePolyAlgorithm FastShortcutNonlinearPolyalg +FastShortcutNLLSPolyalg RobustMultiNewton ``` diff --git a/docs/src/solvers/NonlinearLeastSquaresSolvers.md b/docs/src/solvers/NonlinearLeastSquaresSolvers.md index e414acdd8..1690671d9 100644 --- a/docs/src/solvers/NonlinearLeastSquaresSolvers.md +++ b/docs/src/solvers/NonlinearLeastSquaresSolvers.md @@ -7,7 +7,10 @@ Solves the nonlinear least squares problem defined by `prob` using the algorithm ## Recommended Methods -`LevenbergMarquardt` is a good choice for most problems. +The default method `FastShortcutNLLSPolyalg` is a good choice for most +problems. It is a polyalgorithm that attempts to use a fast algorithm +(`GaussNewton`) and if that fails it falls back to a more robust +algorithm (`LevenbergMarquardt`). ## Full List of Methods @@ -21,10 +24,3 @@ Solves the nonlinear least squares problem defined by `prob` using the algorithm problems. - `SimpleNewtonRaphson()`: Simple Gauss Newton Implementation with `QRFactorization` to solve a linear least squares problem at each step! - -## Example usage - -```julia -using NonlinearSolve -sol = solve(prob, LevenbergMarquardt()) -``` diff --git a/ext/NonlinearSolveFastLevenbergMarquardtExt.jl b/ext/NonlinearSolveFastLevenbergMarquardtExt.jl index 08d719f96..e292e29c6 100644 --- a/ext/NonlinearSolveFastLevenbergMarquardtExt.jl +++ b/ext/NonlinearSolveFastLevenbergMarquardtExt.jl @@ -32,9 +32,10 @@ end (f::InplaceFunction{false})(fx, x, p) = (fx .= f.f(x, p)) function SciMLBase.__init(prob::NonlinearLeastSquaresProblem, - alg::FastLevenbergMarquardtJL, args...; abstol = 1e-8, reltol = 1e-8, - verbose = false, maxiters = 1000, kwargs...) + alg::FastLevenbergMarquardtJL, args...; alias_u0 = false, abstol = 1e-8, + reltol = 1e-8, verbose = false, maxiters = 1000, kwargs...) iip = SciMLBase.isinplace(prob) + u0 = alias_u0 ? prob.u0 : deepcopy(prob.u0) @assert prob.f.jac!==nothing "FastLevenbergMarquardt requires a Jacobian!" @@ -42,13 +43,13 @@ function SciMLBase.__init(prob::NonlinearLeastSquaresProblem, J! = InplaceFunction{iip}(prob.f.jac) resid_prototype = prob.f.resid_prototype === nothing ? - (!iip ? prob.f(prob.u0, prob.p) : zeros(prob.u0)) : + (!iip ? prob.f(u0, prob.p) : zeros(u0)) : prob.f.resid_prototype - J = similar(prob.u0, length(resid_prototype), length(prob.u0)) + J = similar(u0, length(resid_prototype), length(u0)) - solver = _fast_lm_solver(alg, prob.u0) - LM = FastLM.LMWorkspace(prob.u0, resid_prototype, J) + solver = _fast_lm_solver(alg, u0) + LM = FastLM.LMWorkspace(u0, resid_prototype, J) return FastLevenbergMarquardtJLCache(f!, J!, prob, alg, LM, solver, (; xtol = abstol, ftol = reltol, maxit = maxiters, alg.factor, alg.factoraccept, diff --git a/ext/NonlinearSolveLeastSquaresOptimExt.jl b/ext/NonlinearSolveLeastSquaresOptimExt.jl index 5cf6ab67d..63ce7d0cc 100644 --- a/ext/NonlinearSolveLeastSquaresOptimExt.jl +++ b/ext/NonlinearSolveLeastSquaresOptimExt.jl @@ -33,17 +33,19 @@ end (f::FunctionWrapper{false})(du, u) = (du .= f.f(u, f.p)) function SciMLBase.__init(prob::NonlinearLeastSquaresProblem, alg::LeastSquaresOptimJL, - args...; abstol = 1e-8, reltol = 1e-8, verbose = false, maxiters = 1000, kwargs...) + args...; alias_u0 = false, abstol = 1e-8, reltol = 1e-8, verbose = false, + maxiters = 1000, kwargs...) iip = SciMLBase.isinplace(prob) + u = alias_u0 ? prob.u0 : deepcopy(prob.u0) f! = FunctionWrapper{iip}(prob.f, prob.p) g! = prob.f.jac === nothing ? nothing : FunctionWrapper{iip}(prob.f.jac, prob.p) resid_prototype = prob.f.resid_prototype === nothing ? - (!iip ? prob.f(prob.u0, prob.p) : zeros(prob.u0)) : + (!iip ? prob.f(u, prob.p) : zeros(u)) : prob.f.resid_prototype - lsoprob = LSO.LeastSquaresProblem(; x = prob.u0, f!, y = resid_prototype, g!, + lsoprob = LSO.LeastSquaresProblem(; x = u, f!, y = resid_prototype, g!, J = prob.f.jac_prototype, alg.autodiff, output_length = length(resid_prototype)) allocated_prob = LSO.LeastSquaresProblemAllocated(lsoprob, _lso_solver(alg)) diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 39210cd3a..5c97b8340 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -147,7 +147,8 @@ export RadiusUpdateSchemes export NewtonRaphson, TrustRegion, LevenbergMarquardt, DFSane, GaussNewton, PseudoTransient, GeneralBroyden, GeneralKlement, LimitedMemoryBroyden export LeastSquaresOptimJL, FastLevenbergMarquardtJL -export NonlinearSolvePolyAlgorithm, RobustMultiNewton, FastShortcutNonlinearPolyalg +export NonlinearSolvePolyAlgorithm, + RobustMultiNewton, FastShortcutNonlinearPolyalg, FastShortcutNLLSPolyalg export LineSearch, LiFukushimaLineSearch diff --git a/src/default.jl b/src/default.jl index 758e049f3..e1138e8fb 100644 --- a/src/default.jl +++ b/src/default.jl @@ -250,14 +250,57 @@ function FastShortcutNonlinearPolyalg(; concrete_jac = nothing, linsolve = nothi return NonlinearSolvePolyAlgorithm(algs, Val(:NLS)) end +""" + FastShortcutNLLSPolyalg(; concrete_jac = nothing, linsolve = nothing, + precs = DEFAULT_PRECS, adkwargs...) + +A polyalgorithm focused on balancing speed and robustness. It first tries less robust methods +for more performance and then tries more robust techniques if the faster ones fail. + +### Keyword Arguments + + - `autodiff`: determines the backend used for the Jacobian. Note that this argument is + ignored if an analytical Jacobian is passed, as that will be used instead. Defaults to + `AutoForwardDiff()`. Valid choices are types from ADTypes.jl. + - `concrete_jac`: whether to build a concrete Jacobian. If a Krylov-subspace method is used, + then the Jacobian will not be constructed and instead direct Jacobian-vector products + `J*v` are computed using forward-mode automatic differentiation or finite differencing + tricks (without ever constructing the Jacobian). However, if the Jacobian is still needed, + for example for a preconditioner, `concrete_jac = true` can be passed in order to force + the construction of the Jacobian. + - `linsolve`: the [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) used for the + linear solves within the Newton method. Defaults to `nothing`, which means it uses the + LinearSolve.jl default algorithm choice. For more information on available algorithm + choices, see the [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/). + - `precs`: the choice of preconditioners for the linear solver. Defaults to using no + preconditioners. For more information on specifying preconditioners for LinearSolve + algorithms, consult the + [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/). +""" +function FastShortcutNLLSPolyalg(; concrete_jac = nothing, linsolve = nothing, + precs = DEFAULT_PRECS, adkwargs...) + algs = (GaussNewton(; concrete_jac, linsolve, precs, adkwargs...), + GaussNewton(; concrete_jac, linsolve, precs, linesearch = BackTracking(), + adkwargs...), + LevenbergMarquardt(; concrete_jac, linsolve, precs, adkwargs...)) + return NonlinearSolvePolyAlgorithm(algs, Val(:NLLS)) +end + ## Defaults -function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::Nothing, args...; - kwargs...) where {uType, iip} - SciMLBase.__init(prob, FastShortcutNonlinearPolyalg(), args...; kwargs...) +function SciMLBase.__init(prob::NonlinearProblem, ::Nothing, args...; kwargs...) + return SciMLBase.__init(prob, FastShortcutNonlinearPolyalg(), args...; kwargs...) +end + +function SciMLBase.__solve(prob::NonlinearProblem, ::Nothing, args...; kwargs...) + return SciMLBase.__solve(prob, FastShortcutNonlinearPolyalg(), args...; kwargs...) +end + +function SciMLBase.__init(prob::NonlinearLeastSquaresProblem, ::Nothing, args...; kwargs...) + return SciMLBase.__init(prob, FastShortcutNLLSPolyalg(), args...; kwargs...) end -function SciMLBase.__solve(prob::NonlinearProblem{uType, iip}, alg::Nothing, args...; - kwargs...) where {uType, iip} - SciMLBase.__solve(prob, FastShortcutNonlinearPolyalg(), args...; kwargs...) +function SciMLBase.__solve(prob::NonlinearLeastSquaresProblem, ::Nothing, args...; + kwargs...) + return SciMLBase.__solve(prob, FastShortcutNLLSPolyalg(), args...; kwargs...) end diff --git a/test/23_test_problems.jl b/test/23_test_problems.jl index b77d89087..72843c42e 100644 --- a/test/23_test_problems.jl +++ b/test/23_test_problems.jl @@ -85,7 +85,7 @@ end # Broyden and Klement Tests are quite flaky and failure seems to be platform dependent # needs additional investigation before we can enable them @testset "GeneralBroyden 23 Test Problems" begin - alg_ops = (GeneralBroyden(),) + alg_ops = (GeneralBroyden(; max_resets = 10),) broken_tests = Dict(alg => Int[] for alg in alg_ops) broken_tests[alg_ops[1]] = [1, 2, 4, 5, 6, 11, 12, 13, 14] diff --git a/test/nonlinear_least_squares.jl b/test/nonlinear_least_squares.jl index 07a310196..7b8354a9d 100644 --- a/test/nonlinear_least_squares.jl +++ b/test/nonlinear_least_squares.jl @@ -12,12 +12,12 @@ y_target = true_function(x, θ_true) function loss_function(θ, p) ŷ = true_function(p, θ) - return abs2.(ŷ .- y_target) + return ŷ .- y_target end function loss_function(resid, θ, p) true_function(resid, p, θ) - resid .= abs2.(resid .- y_target) + resid .= resid .- y_target return resid end @@ -36,6 +36,7 @@ append!(solvers, LevenbergMarquardt(; linsolve = LUFactorization()), LeastSquaresOptimJL(:lm), LeastSquaresOptimJL(:dogleg), + nothing, ]) for prob in nlls_problems, solver in solvers @@ -45,7 +46,8 @@ for prob in nlls_problems, solver in solvers end function jac!(J, θ, p) - ForwardDiff.jacobian!(J, resid -> loss_function(resid, θ, p), θ) + resid = zeros(length(p)) + ForwardDiff.jacobian!(J, (resid, θ) -> loss_function(resid, θ, p), resid, θ) return J end @@ -56,6 +58,5 @@ solvers = [FastLevenbergMarquardtJL(:cholesky), FastLevenbergMarquardtJL(:qr)] for solver in solvers @time sol = solve(prob, solver; maxiters = 10000, abstol = 1e-8) - @test SciMLBase.successful_retcode(sol) @test norm(sol.resid) < 1e-6 end