Select the correct AD Type based on problem specification

SciML · Oct 17, 2023 · b17b421 · b17b421
1 parent ff604ee
commit b17b421
Show file tree

Hide file tree

Showing 6 changed files with 92 additions and 26 deletions.
diff --git a/src/gaussnewton.jl b/src/gaussnewton.jl
@@ -14,7 +14,8 @@ for large-scale and numerically-difficult nonlinear least squares problems.
 
   - `autodiff`: determines the backend used for the Jacobian. Note that this argument is
     ignored if an analytical Jacobian is passed, as that will be used instead. Defaults to
-    `AutoForwardDiff()`. Valid choices are types from ADTypes.jl.
+    `nothing` which means that a default is selected according to the problem specification!
+    Valid choices are types from ADTypes.jl.
   - `concrete_jac`: whether to build a concrete Jacobian. If a Krylov-subspace method is used,
     then the Jacobian will not be constructed and instead direct Jacobian-vector products
     `J*v` are computed using forward-mode automatic differentiation or finite differencing
@@ -41,6 +42,10 @@ for large-scale and numerically-difficult nonlinear least squares problems.
     precs
 end
 
+function set_ad(alg::GaussNewton{CJ}, ad) where {CJ}
+    return GaussNewton{CJ}(ad, alg.linsolve, alg.precs)
+end
+
 function GaussNewton(; concrete_jac = nothing, linsolve = CholeskyFactorization(),
     precs = DEFAULT_PRECS, adkwargs...)
     ad = default_adargs_to_adtype(; adkwargs...)
@@ -71,9 +76,10 @@ end
     stats::NLStats
 end
 
-function SciMLBase.__init(prob::NonlinearLeastSquaresProblem{uType, iip}, alg::GaussNewton,
+function SciMLBase.__init(prob::NonlinearLeastSquaresProblem{uType, iip}, alg_::GaussNewton,
     args...; alias_u0 = false, maxiters = 1000, abstol = 1e-6, internalnorm = DEFAULT_NORM,
     kwargs...) where {uType, iip}
+    alg = get_concrete_algorithm(alg_, prob)
     @unpack f, u0, p = prob
     u = alias_u0 ? u0 : deepcopy(u0)
     if iip

diff --git a/src/levenberg.jl b/src/levenberg.jl
@@ -14,7 +14,8 @@ numerically-difficult nonlinear systems.
 
   - `autodiff`: determines the backend used for the Jacobian. Note that this argument is
     ignored if an analytical Jacobian is passed, as that will be used instead. Defaults to
-    `AutoForwardDiff()`. Valid choices are types from ADTypes.jl.
+    `nothing` which means that a default is selected according to the problem specification!
+    Valid choices are types from ADTypes.jl.
   - `concrete_jac`: whether to build a concrete Jacobian. If a Krylov-subspace method is used,
     then the Jacobian will not be constructed and instead direct Jacobian-vector products
     `J*v` are computed using forward-mode automatic differentiation or finite differencing
@@ -86,6 +87,12 @@ numerically-difficult nonlinear systems.
     min_damping_D::T
 end
 
+function set_ad(alg::LevenbergMarquardt{CJ}, ad) where {CJ}
+    return LevenbergMarquardt{CJ}(ad, alg.linsolve, alg.precs, alg.damping_initial,
+        alg.damping_increase_factor, alg.damping_decrease_factor,
+        alg.finite_diff_step_geodesic, alg.α_geodesic, alg.b_uphill, alg.min_damping_D)
+end
+
 function LevenbergMarquardt(; concrete_jac = nothing, linsolve = nothing,
     precs = DEFAULT_PRECS, damping_initial::Real = 1.0, damping_increase_factor::Real = 2.0,
     damping_decrease_factor::Real = 3.0, finite_diff_step_geodesic::Real = 0.1,
@@ -141,9 +148,10 @@ end
 end
 
 function SciMLBase.__init(prob::Union{NonlinearProblem{uType, iip},
-        NonlinearLeastSquaresProblem{uType, iip}}, alg::LevenbergMarquardt,
+        NonlinearLeastSquaresProblem{uType, iip}}, alg_::LevenbergMarquardt,
     args...; alias_u0 = false, maxiters = 1000, abstol = 1e-6, internalnorm = DEFAULT_NORM,
     linsolve_kwargs = (;), kwargs...) where {uType, iip}
+    alg = get_concrete_algorithm(alg_, prob)
     @unpack f, u0, p = prob
     u = alias_u0 ? u0 : deepcopy(u0)
     fu1 = evaluate_f(prob, u)

diff --git a/src/raphson.jl b/src/raphson.jl
@@ -10,7 +10,8 @@ for large-scale and numerically-difficult nonlinear systems.
 
   - `autodiff`: determines the backend used for the Jacobian. Note that this argument is
     ignored if an analytical Jacobian is passed, as that will be used instead. Defaults to
-    `AutoForwardDiff()`. Valid choices are types from ADTypes.jl.
+    `nothing` which means that a default is selected according to the problem specification!
+    Valid choices are types from ADTypes.jl.
   - `concrete_jac`: whether to build a concrete Jacobian. If a Krylov-subspace method is used,
     then the Jacobian will not be constructed and instead direct Jacobian-vector products
     `J*v` are computed using forward-mode automatic differentiation or finite differencing
@@ -36,6 +37,10 @@ for large-scale and numerically-difficult nonlinear systems.
     linesearch
 end
 
+function set_ad(alg::NewtonRaphson{CJ}, ad) where {CJ}
+    return NewtonRaphson{CJ}(ad, alg.linsolve, alg.precs, alg.linesearch)
+end
+
 function NewtonRaphson(; concrete_jac = nothing, linsolve = nothing,
     linesearch = LineSearch(), precs = DEFAULT_PRECS, adkwargs...)
     ad = default_adargs_to_adtype(; adkwargs...)
@@ -65,9 +70,10 @@ end
     lscache
 end
 
-function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::NewtonRaphson, args...;
+function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::NewtonRaphson, args...;
     alias_u0 = false, maxiters = 1000, abstol = 1e-6, internalnorm = DEFAULT_NORM,
     linsolve_kwargs = (;), kwargs...) where {uType, iip}
+    alg = get_concrete_algorithm(alg_, prob)
     @unpack f, u0, p = prob
     u = alias_u0 ? u0 : deepcopy(u0)
     fu1 = evaluate_f(prob, u)

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
@@ -97,7 +97,8 @@ for large-scale and numerically-difficult nonlinear systems.
 
   - `autodiff`: determines the backend used for the Jacobian. Note that this argument is
     ignored if an analytical Jacobian is passed, as that will be used instead. Defaults to
-    `AutoForwardDiff()`. Valid choices are types from ADTypes.jl.
+    `nothing` which means that a default is selected according to the problem specification!.
+    Valid choices are types from ADTypes.jl.
   - `concrete_jac`: whether to build a concrete Jacobian. If a Krylov-subspace method is used,
     then the Jacobian will not be constructed and instead direct Jacobian-vector products
     `J*v` are computed using forward-mode automatic differentiation or finite differencing
@@ -162,6 +163,13 @@ for large-scale and numerically-difficult nonlinear systems.
     max_shrink_times::Int
 end
 
+function set_ad(alg::TrustRegion{CJ}, ad) where {CJ}
+    return TrustRegion{CJ}(ad, alg.linsolve, alg.precs, alg.radius_update_scheme,
+        alg.max_trust_radius, alg.initial_trust_radius, alg.step_threshold,
+        alg.shrink_threshold, alg.expand_threshold, alg.shrink_factor, alg.expand_factor,
+        alg.max_shrink_times)
+end
+
 function TrustRegion(; concrete_jac = nothing, linsolve = nothing, precs = DEFAULT_PRECS,
     radius_update_scheme::RadiusUpdateSchemes.T = RadiusUpdateSchemes.Simple, #defaults to conventional radius update
     max_trust_radius::Real = 0 // 1, initial_trust_radius::Real = 0 // 1,
@@ -222,9 +230,10 @@ end
     stats::NLStats
 end
 
-function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion, args...;
+function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::TrustRegion, args...;
     alias_u0 = false, maxiters = 1000, abstol = 1e-8, internalnorm = DEFAULT_NORM,
     linsolve_kwargs = (;), kwargs...) where {uType, iip}
+    alg = get_concrete_algorithm(alg_, prob)
     @unpack f, u0, p = prob
     u = alias_u0 ? u0 : deepcopy(u0)
     u_prev = zero(u)

diff --git a/src/utils.jl b/src/utils.jl
@@ -13,28 +13,41 @@ end
 @inline DEFAULT_NORM(u::AbstractArray) = sqrt(real(sum(UNITLESS_ABS2, u)) / length(u))
 @inline DEFAULT_NORM(u) = norm(u)
 
-alg_autodiff(alg::AbstractNewtonAlgorithm{<:AbstractFiniteDifferencesMode}) = false
-alg_autodiff(alg::AbstractNewtonAlgorithm) = true
-alg_autodiff(alg) = false
-
 """
     default_adargs_to_adtype(; chunk_size = Val{0}(), autodiff = Val{true}(),
         standardtag = Val{true}(), diff_type = Val{:forward})
 
 Construct the AD type from the arguments. This is mostly needed for compatibility with older
 code.
+
+!!! warning
+    `chunk_size`, `standardtag`, `diff_type`, and `autodiff::Union{Val, Bool}` are
+    deprecated and will be removed in v3. Update your code to directly specify
+    `autodiff=<ADTypes>`.
 """
-function default_adargs_to_adtype(; chunk_size = Val{0}(), autodiff = Val{true}(),
-    standardtag = Val{true}(), diff_type = Val{:forward}())
-    ad = _unwrap_val(autodiff)
-    # Old API
-    if ad isa Bool
-        # FIXME: standardtag is not the Tag
-        ad && return AutoForwardDiff(; chunksize = _unwrap_val(chunk_size),
-            tag = _unwrap_val(standardtag))
-        return AutoFiniteDiff(; fdtype = diff_type)
+function default_adargs_to_adtype(; chunk_size = missing, autodiff = nothing,
+    standardtag = missing, diff_type = missing)
+    # If using the new API short circuit
+    autodiff === nothing && return nothing
+    autodiff isa ADTypes.AbstractADType && return autodiff
+
+    # Deprecate the old version
+    if chunk_size !== missing || standardtag !== missing || diff_type !== missing ||
+       autodiff !== missing
+        Base.depwarn("`chunk_size`, `standardtag`, `diff_type`, \
+            `autodiff::Union{Val, Bool}` kwargs have been deprecated and will be removed in\
+             v3. Update your code to directly specify autodiff=<ADTypes>",
+            :default_adargs_to_adtype)
     end
-    return ad
+    chunk_size === missing && (chunk_size = Val{0}())
+    standardtag === missing && (standardtag = Val{true}())
+    diff_type === missing && (diff_type = Val{:forward}())
+    autodiff === missing && (autodiff = Val{true}())
+
+    ad = _unwrap_val(autodiff)
+    tag = _unwrap_val(standardtag)
+    ad && return AutoForwardDiff{_unwrap_val(chunk_size), typeof(tag)}(tag)
+    return AutoFiniteDiff(; fdtype = diff_type)
 end
 
 """
@@ -171,3 +184,27 @@ Defaults to `mul!(C, A, B)`. However, for sparse matrices uses `C .= A * B`.
 """
 __matmul!(C, A, B) = mul!(C, A, B)
 __matmul!(C::AbstractSparseMatrix, A, B) = C .= A * B
+
+# Concretize Algorithms
+function get_concrete_algorithm(alg, prob)
+    !hasfield(typeof(alg), :ad) && return alg
+    alg.ad isa ADTypes.AbstractADType && return alg
+
+    # Figure out the default AD
+    # Now that we have handed trivial cases, we can allow extending this function
+    # for specific algorithms
+    return __get_concrete_algorithm(alg, prob)
+end
+
+function __get_concrete_algorithm(alg, prob)
+    @unpack sparsity, jac_prototype = prob.f
+    use_sparse_ad = sparsity !== nothing || jac_prototype !== nothing
+    ad = if eltype(prob.u0) <: Complex
+        # Use Finite Differencing
+        use_sparse_ad ? AutoSparseFiniteDiff() : AutoFiniteDiff()
+    else
+        use_sparse_ad ? AutoSparseForwardDiff() :
+        AutoForwardDiff{ForwardDiff.pickchunksize(length(prob.u0)), Nothing}(nothing)
+    end
+    return set_ad(alg, ad)
+end
diff --git a/test/polyalgs.jl b/test/polyalgs.jl
@@ -2,7 +2,7 @@ using NonlinearSolve, Test
 
 f(u, p) = u .* u .- 2
 u0 = [1.0, 1.0]
-probN = NonlinearProblem(f, u0)
+probN = NonlinearProblem{false}(f, u0)
 
 # Uses the `__solve` function
 @time solver = solve(probN; abstol = 1e-9)
@@ -13,13 +13,13 @@ probN = NonlinearProblem(f, u0)
 @test SciMLBase.successful_retcode(solver)
 
 # Test the caching interface
-cache = init(probN; abstol = 1e-9)
+cache = init(probN; abstol = 1e-9);
 @time solver = solve!(cache)
 @test SciMLBase.successful_retcode(solver)
-cache = init(probN, RobustMultiNewton(); abstol = 1e-9)
+cache = init(probN, RobustMultiNewton(); abstol = 1e-9);
 @time solver = solve!(cache)
 @test SciMLBase.successful_retcode(solver)
-cache = init(probN, FastShortcutNonlinearPolyalg(); abstol = 1e-9)
+cache = init(probN, FastShortcutNonlinearPolyalg(); abstol = 1e-9);
 @time solver = solve!(cache)
 @test SciMLBase.successful_retcode(solver)