diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml
index 9c7935911..320e0c073 100644
--- a/.JuliaFormatter.toml
+++ b/.JuliaFormatter.toml
@@ -1,2 +1,3 @@
 style = "sciml"
-format_markdown = true
\ No newline at end of file
+format_markdown = true
+annotate_untyped_fields_with_any = false
diff --git a/Project.toml b/Project.toml
index ed0b27f95..db9ad0d35 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,10 +1,12 @@
 name = "NonlinearSolve"
 uuid = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
 authors = ["SciML"]
-version = "1.10.0"
+version = "1.11.0"
 
 [deps]
+ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
+ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471"
 DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
 EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56"
 FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41"
diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index cae730bc3..38a4b6142 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -1,38 +1,41 @@
 module NonlinearSolve
-if isdefined(Base, :Experimental) &&
-   isdefined(Base.Experimental, Symbol("@max_methods"))
+
+if isdefined(Base, :Experimental) && isdefined(Base.Experimental, Symbol("@max_methods"))
     @eval Base.Experimental.@max_methods 1
 end
-using Reexport
-using UnPack: @unpack
-using FiniteDiff, ForwardDiff
-using ForwardDiff: Dual
-using LinearAlgebra
-using StaticArraysCore
-using RecursiveArrayTools
-import EnumX
-import ArrayInterface
-import LinearSolve
-using DiffEqBase
-using SparseDiffTools
-
-@reexport using SciMLBase
-using SciMLBase: NLStats
-@reexport using SimpleNonlinearSolve
-
-import SciMLBase: _unwrap_val
-
-abstract type AbstractNonlinearSolveAlgorithm <: SciMLBase.AbstractNonlinearAlgorithm end
-abstract type AbstractNewtonAlgorithm{CS, AD, FDT, ST, CJ} <:
-              AbstractNonlinearSolveAlgorithm end
-
-function SciMLBase.__solve(prob::NonlinearProblem,
-    alg::AbstractNonlinearSolveAlgorithm, args...;
-    kwargs...)
+
+using DiffEqBase, LinearAlgebra, LinearSolve, SparseDiffTools
+import ForwardDiff
+
+import ADTypes: AbstractFiniteDifferencesMode
+import ArrayInterface: undefmatrix
+import ConcreteStructs: @concrete
+import EnumX: @enumx
+import ForwardDiff: Dual
+import LinearSolve: ComposePreconditioner, InvPreconditioner, needs_concrete_A
+import RecursiveArrayTools: AbstractVectorOfArray, recursivecopy!, recursivefill!
+import Reexport: @reexport
+import SciMLBase: AbstractNonlinearAlgorithm, NLStats, _unwrap_val, has_jac, isinplace
+import SparseDiffTools: __init_𝒥
+import StaticArraysCore: StaticArray, SVector
+import UnPack: @unpack
+
+@reexport using ADTypes, SciMLBase, SimpleNonlinearSolve
+
+const AbstractSparseADType = Union{ADTypes.AbstractSparseFiniteDifferences,
+    ADTypes.AbstractSparseForwardMode, ADTypes.AbstractSparseReverseMode}
+
+abstract type AbstractNonlinearSolveAlgorithm <: AbstractNonlinearAlgorithm end
+abstract type AbstractNewtonAlgorithm{CJ, AD} <: AbstractNonlinearSolveAlgorithm end
+
+function SciMLBase.__solve(prob::NonlinearProblem, alg::AbstractNonlinearSolveAlgorithm,
+    args...; kwargs...)
     cache = init(prob, alg, args...; kwargs...)
-    sol = solve!(cache)
+    return solve!(cache)
 end
 
+# FIXME: Scalar Case is Completely Broken
+
 include("utils.jl")
 include("raphson.jl")
 include("trustRegion.jl")
@@ -44,23 +47,23 @@ import PrecompileTools
 
 PrecompileTools.@compile_workload begin
     for T in (Float32, Float64)
-        prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2))
+        # prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2))
 
-        precompile_algs = if VERSION >= v"1.7"
-            (NewtonRaphson(), TrustRegion(), LevenbergMarquardt())
-        else
-            (NewtonRaphson(),)
-        end
+        #         precompile_algs = if VERSION ≥ v"1.7"
+        #             (NewtonRaphson(), TrustRegion(), LevenbergMarquardt())
+        #         else
+        #             (NewtonRaphson(),)
+        #         end
 
-        for alg in precompile_algs
-            solve(prob, alg, abstol = T(1e-2))
-        end
+        #         for alg in precompile_algs
+        #             solve(prob, alg, abstol = T(1e-2))
+        #         end
 
         prob = NonlinearProblem{true}((du, u, p) -> du[1] = u[1] * u[1] - p[1], T[0.1],
             T[2])
-        for alg in precompile_algs
-            solve(prob, alg, abstol = T(1e-2))
-        end
+        #         for alg in precompile_algs
+        #             solve(prob, alg, abstol = T(1e-2))
+        #         end
     end
 end
 
diff --git a/src/ad.jl b/src/ad.jl
index 0dad74c56..faa8c9f04 100644
--- a/src/ad.jl
+++ b/src/ad.jl
@@ -23,22 +23,17 @@ function scalar_nlsolve_ad(prob, alg, args...; kwargs...)
     return sol, partials
 end
 
-function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector},
-        iip,
-        <:Dual{T, V, P}},
-    alg::AbstractNewtonAlgorithm,
-    args...; kwargs...) where {iip, T, V, P}
+function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, SVector}, iip,
+        <:Dual{T, V, P}}, alg::AbstractNewtonAlgorithm, args...;
+    kwargs...) where {iip, T, V, P}
     sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...)
     return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid;
-        retcode = sol.retcode)
+        sol.retcode)
 end
-function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector},
-        iip,
-        <:AbstractArray{<:Dual{T, V, P}}},
-    alg::AbstractNewtonAlgorithm,
-    args...;
+function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, SVector}, iip,
+        <:AbstractArray{<:Dual{T, V, P}}}, alg::AbstractNewtonAlgorithm, args...;
     kwargs...) where {iip, T, V, P}
     sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...)
     return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid;
-        retcode = sol.retcode)
+        sol.retcode)
 end
diff --git a/src/jacobian.jl b/src/jacobian.jl
index 8296069e0..dfa8b1212 100644
--- a/src/jacobian.jl
+++ b/src/jacobian.jl
@@ -1,120 +1,72 @@
-struct JacobianWrapper{fType, pType}
-    f::fType
-    p::pType
+@concrete struct JacobianWrapper
+    f
+    p
 end
 
 (uf::JacobianWrapper)(u) = uf.f(u, uf.p)
 (uf::JacobianWrapper)(res, u) = uf.f(res, u, uf.p)
 
-struct NonlinearSolveTag end
-
-function sparsity_colorvec(f, x)
-    sparsity = f.sparsity
-    colorvec = DiffEqBase.has_colorvec(f) ? f.colorvec :
-               (isnothing(sparsity) ? (1:length(x)) : matrix_colors(sparsity))
-    sparsity, colorvec
-end
-
-function jacobian_finitediff_forward!(J, f, x, jac_config, forwardcache, cache)
-    (FiniteDiff.finite_difference_jacobian!(J, f, x, jac_config, forwardcache);
-    maximum(jac_config.colorvec))
-end
-function jacobian_finitediff!(J, f, x, jac_config, cache)
-    (FiniteDiff.finite_difference_jacobian!(J, f, x, jac_config);
-    2 * maximum(jac_config.colorvec))
-end
+# function sparsity_colorvec(f, x)
+#     sparsity = f.sparsity
+#     colorvec = DiffEqBase.has_colorvec(f) ? f.colorvec :
+#                (isnothing(sparsity) ? (1:length(x)) : matrix_colors(sparsity))
+#     sparsity, colorvec
+# end
 
 # NoOp for Jacobian if it is not a Abstract Array -- For eg, JacVec Operator
-jacobian!(J, cache) = J
-function jacobian!(J::AbstractMatrix{<:Number}, cache)
-    f = cache.f
-    uf = cache.uf
-    x = cache.u
-    fx = cache.fu
-    jac_config = cache.jac_config
-    alg = cache.alg
-
-    if SciMLBase.has_jac(f)
-        f.jac(J, x, cache.p)
-    elseif alg_autodiff(alg)
-        forwarddiff_color_jacobian!(J, uf, x, jac_config)
-        #cache.destats.nf += 1
+jacobian!!(J, _) = J
+# `!!` notation is from BangBang.jl since J might be jacobian in case of oop `f.jac`
+# and we don't want wasteful `copyto!`
+function jacobian!!(J::Union{AbstractMatrix{<:Number}, Nothing}, cache)
+    @unpack f, uf, u, p, jac_cache, alg, fu2 = cache
+    iip = isinplace(cache)
+    if iip
+        has_jac(f) ? f.jac(J, u, p) : sparse_jacobian!(J, alg.ad, jac_cache, uf, fu2, u)
     else
-        isforward = alg_difftype(alg) === Val{:forward}
-        if isforward
-            uf(fx, x)
-            #cache.destats.nf += 1
-            tmp = jacobian_finitediff_forward!(J, uf, x, jac_config, fx,
-                cache)
-        else # not forward difference
-            tmp = jacobian_finitediff!(J, uf, x, jac_config, cache)
-        end
-        #cache.destats.nf += tmp
+        return has_jac(f) ? f.jac(u, p) : sparse_jacobian!(J, alg.ad, jac_cache, uf, u)
     end
-    nothing
+    return nothing
 end
 
-function build_jac_and_jac_config(alg, f::F1, uf::F2, du1, u, tmp, du2) where {F1, F2}
+# Build Jacobian Caches
+function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f, u, p,
+    ::Val{iip}) where {iip}
+    uf = JacobianWrapper(f, p)
+
     haslinsolve = hasfield(typeof(alg), :linsolve)
 
-    has_analytic_jac = SciMLBase.has_jac(f)
+    has_analytic_jac = has_jac(f)
     linsolve_needs_jac = (concrete_jac(alg) === nothing &&
                           (!haslinsolve || (haslinsolve && (alg.linsolve === nothing ||
-                             LinearSolve.needs_concrete_A(alg.linsolve)))))
-    alg_wants_jac = (concrete_jac(alg) !== nothing && concrete_jac(alg))
+                             needs_concrete_A(alg.linsolve)))))
+    alg_wants_jac = (concrete_jac(alg) === nothing && concrete_jac(alg))
 
+    fu = zero(u)  # TODO: Use Prototype
     if !has_analytic_jac && (linsolve_needs_jac || alg_wants_jac)
-        sparsity, colorvec = sparsity_colorvec(f, u)
-
-        if alg_autodiff(alg)
-            _chunksize = get_chunksize(alg) === Val(0) ? nothing : get_chunksize(alg) # SparseDiffEq uses different convection...
-
-            T = if standardtag(alg)
-                typeof(ForwardDiff.Tag(NonlinearSolveTag(), eltype(u)))
-            else
-                typeof(ForwardDiff.Tag(uf, eltype(u)))
-            end
-            jac_config = ForwardColorJacCache(uf, u, _chunksize; colorvec, sparsity,
-                tag = T)
-        else
-            if alg_difftype(alg) !== Val{:complex}
-                jac_config = FiniteDiff.JacobianCache(tmp, du1, du2, alg_difftype(alg);
-                    colorvec, sparsity)
-            else
-                jac_config = FiniteDiff.JacobianCache(Complex{eltype(tmp)}.(tmp),
-                    Complex{eltype(du1)}.(du1), nothing, alg_difftype(alg), eltype(u);
-                    colorvec, sparsity)
-            end
-        end
+        # TODO: We need an Upstream Mode to allow using known sparsity and colorvec
+        # TODO: We can use the jacobian prototype here
+        sd = typeof(alg.ad) <: AbstractSparseADType ? SymbolicsSparsityDetection() :
+             NoSparsityDetection()
+        jac_cache = iip ? sparse_jacobian_cache(alg.ad, sd, uf, fu, u) :
+                    sparse_jacobian_cache(alg.ad, sd, uf, u; fx=fu)
     else
-        jac_config = nothing
+        jac_cache = nothing
     end
 
     J = if !linsolve_needs_jac
         # We don't need to construct the Jacobian
-        JacVec(uf, u; autodiff = alg_autodiff(alg) ? AutoForwardDiff() : AutoFiniteDiff())
+        JacVec(uf, u; autodiff = alg.ad)
     else
-        if f.jac_prototype === nothing
-            ArrayInterface.undefmatrix(u)
+        if has_analytic_jac
+            iip ? undefmatrix(u) : nothing
         else
-            f.jac_prototype
+            f.jac_prototype === nothing ? __init_𝒥(jac_cache) : f.jac_prototype
         end
     end
 
-    return J, jac_config
-end
-
-# Build Jacobian Caches
-function jacobian_caches(alg::Union{NewtonRaphson, LevenbergMarquardt, TrustRegion}, f, u,
-    p, ::Val{true})
-    uf = JacobianWrapper(f, p)
-
-    du1 = zero(u)
-    du2 = zero(u)
-    tmp = zero(u)
-    J, jac_config = build_jac_and_jac_config(alg, f, uf, du1, u, tmp, du2)
-
+    # FIXME: Assumes same sized `u` and `fu` -- Incorrect Assumption for Levenberg
     linprob = LinearProblem(J, _vec(zero(u)); u0 = _vec(zero(u)))
+
     weight = similar(u)
     recursivefill!(weight, true)
 
@@ -122,64 +74,5 @@ function jacobian_caches(alg::Union{NewtonRaphson, LevenbergMarquardt, TrustRegi
             nothing)..., weight)
     linsolve = init(linprob, alg.linsolve; alias_A = true, alias_b = true, Pl, Pr)
 
-    uf, linsolve, J, du1, jac_config
-end
-
-function get_chunksize(jac_config::ForwardDiff.JacobianConfig{
-    T,
-    V,
-    N,
-    D,
-}) where {T, V, N, D
-}
-    Val(N)
-end # don't degrade compile time information to runtime information
-
-function jacobian_finitediff(f, x, ::Type{diff_type}, dir, colorvec, sparsity,
-    jac_prototype) where {diff_type}
-    (FiniteDiff.finite_difference_derivative(f, x, diff_type, eltype(x), dir = dir), 2)
-end
-function jacobian_finitediff(f, x::AbstractArray, ::Type{diff_type}, dir, colorvec,
-    sparsity, jac_prototype) where {diff_type}
-    f_in = diff_type === Val{:forward} ? f(x) : similar(x)
-    ret_eltype = eltype(f_in)
-    J = FiniteDiff.finite_difference_jacobian(f, x, diff_type, ret_eltype, f_in,
-        dir = dir, colorvec = colorvec,
-        sparsity = sparsity,
-        jac_prototype = jac_prototype)
-    return J, _nfcount(maximum(colorvec), diff_type)
-end
-function jacobian(cache, f::F) where {F}
-    x = cache.u
-    alg = cache.alg
-    uf = cache.uf
-    local tmp
-
-    if DiffEqBase.has_jac(cache.f)
-        J = f.jac(cache.u, cache.p)
-    elseif alg_autodiff(alg)
-        J, tmp = jacobian_autodiff(uf, x, cache.f, alg)
-    else
-        jac_prototype = cache.f.jac_prototype
-        sparsity, colorvec = sparsity_colorvec(cache.f, x)
-        dir = true
-        J, tmp = jacobian_finitediff(uf, x, alg_difftype(alg), dir, colorvec, sparsity,
-            jac_prototype)
-    end
-    J
-end
-
-jacobian_autodiff(f, x, nonlinfun, alg) = (ForwardDiff.derivative(f, x), 1, alg)
-function jacobian_autodiff(f, x::AbstractArray, nonlinfun, alg)
-    jac_prototype = nonlinfun.jac_prototype
-    sparsity, colorvec = sparsity_colorvec(nonlinfun, x)
-    maxcolor = maximum(colorvec)
-    chunk_size = get_chunksize(alg) === Val(0) ? nothing : get_chunksize(alg)
-    num_of_chunks = chunk_size === nothing ?
-                    Int(ceil(maxcolor /
-                             SparseDiffTools.getsize(ForwardDiff.pickchunksize(maxcolor)))) :
-                    Int(ceil(maxcolor / _unwrap_val(chunk_size)))
-    (forwarddiff_color_jacobian(f, x, colorvec = colorvec, sparsity = sparsity,
-            jac_prototype = jac_prototype, chunksize = chunk_size),
-        num_of_chunks)
+    return uf, linsolve, J, fu, jac_cache
 end
diff --git a/src/levenberg.jl b/src/levenberg.jl
index db8955f4a..721e08cd3 100644
--- a/src/levenberg.jl
+++ b/src/levenberg.jl
@@ -1,113 +1,82 @@
 """
-```julia
-LevenbergMarquardt(; chunk_size = Val{0}(),
-                    autodiff = Val{true}(),
-                    standardtag = Val{true}(),
-                    concrete_jac = nothing,
-                    diff_type = Val{:forward},
-                    linsolve = nothing, precs = DEFAULT_PRECS,
-                    damping_initial::Real = 1.0,
-                    damping_increase_factor::Real = 2.0,
-                    damping_decrease_factor::Real = 3.0,
-                    finite_diff_step_geodesic::Real = 0.1,
-                    α_geodesic::Real = 0.75,
-                    b_uphill::Real = 1.0,
-                    min_damping_D::AbstractFloat = 1e-8)
-```
+    LevenbergMarquardt(; concrete_jac = nothing, linsolve = nothing,
+        precs = DEFAULT_PRECS, damping_initial::Real = 1.0,
+        damping_increase_factor::Real = 2.0, damping_decrease_factor::Real = 3.0,
+        finite_diff_step_geodesic::Real = 0.1, α_geodesic::Real = 0.75,
+        b_uphill::Real = 1.0, min_damping_D::AbstractFloat = 1e-8, adkwargs...)
 
 An advanced Levenberg-Marquardt implementation with the improvements suggested in the
 [paper](https://arxiv.org/abs/1201.5885) "Improvements to the Levenberg-Marquardt
 algorithm for nonlinear least-squares minimization". Designed for large-scale and
 numerically-difficult nonlinear systems.
 
-
 ### Keyword Arguments
 
-- `chunk_size`: the chunk size used by the internal ForwardDiff.jl automatic differentiation
-  system. This allows for multiple derivative columns to be computed simultaneously,
-  improving performance. Defaults to `0`, which is equivalent to using ForwardDiff.jl's
-  default chunk size mechanism. For more details, see the documentation for
-  [ForwardDiff.jl](https://juliadiff.org/ForwardDiff.jl/stable/).
-- `autodiff`: whether to use forward-mode automatic differentiation for the Jacobian.
-  Note that this argument is ignored if an analytical Jacobian is passed, as that will be
-  used instead. Defaults to `Val{true}`, which means ForwardDiff.jl via
-  SparseDiffTools.jl is used by default. If `Val{false}`, then FiniteDiff.jl is used for
-  finite differencing.
-- `standardtag`: whether to use a standardized tag definition for the purposes of automatic
-  differentiation. Defaults to true, which thus uses the `NonlinearSolveTag`. If `Val{false}`,
-  then ForwardDiff's default function naming tag is used, which results in larger stack
-  traces.
-- `concrete_jac`: whether to build a concrete Jacobian. If a Krylov-subspace method is used,
-  then the Jacobian will not be constructed and instead direct Jacobian-vector products
-  `J*v` are computed using forward-mode automatic differentiation or finite differencing
-  tricks (without ever constructing the Jacobian). However, if the Jacobian is still needed,
-  for example for a preconditioner, `concrete_jac = true` can be passed in order to force
-  the construction of the Jacobian.
-- `diff_type`: the type of finite differencing used if `autodiff = false`. Defaults to
-  `Val{:forward}` for forward finite differences. For more details on the choices, see the
-  [FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl) documentation.
-- `linsolve`: the [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) used for the
-  linear solves within the Newton method. Defaults to `nothing`, which means it uses the
-  LinearSolve.jl default algorithm choice. For more information on available algorithm
-  choices, see the [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
-- `precs`: the choice of preconditioners for the linear solver. Defaults to using no
-  preconditioners. For more information on specifying preconditioners for LinearSolve
-  algorithms, consult the
-  [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
-- `damping_initial`: the starting value for the damping factor. The damping factor is
-  inversely proportional to the step size. The damping factor is adjusted during each
-  iteration. Defaults to `1.0`. For more details, see section 2.1 of
-  [this paper](https://arxiv.org/abs/1201.5885).
-- `damping_increase_factor`: the factor by which the damping is increased if a step is
-  rejected. Defaults to `2.0`. For more details, see section 2.1 of
-  [this paper](https://arxiv.org/abs/1201.5885).
-- `damping_decrease_factor`: the factor by which the damping is decreased if a step is
-  accepted. Defaults to `3.0`. For more details, see section 2.1 of
-  [this paper](https://arxiv.org/abs/1201.5885).
-- `finite_diff_step_geodesic`: the step size used for finite differencing used to calculate
-  the geodesic acceleration. Defaults to `0.1` which means that the step size is
-  approximately 10% of the first-order step. For more details, see section 3 of
-  [this paper](https://arxiv.org/abs/1201.5885).
-- `α_geodesic`: a factor that determines if a step is accepted or rejected. To incorporate
-  geodesic acceleration as an addition to the Levenberg-Marquardt algorithm, it is necessary
-  that acceptable steps meet the condition
-  ``\\frac{2||a||}{||v||} \\le \\alpha_{\\text{geodesic}}``, where ``a`` is the geodesic
-  acceleration, ``v`` is the Levenberg-Marquardt algorithm's step (velocity along a geodesic
-  path) and `α_geodesic` is some number of order `1`. For most problems `α_geodesic = 0.75`
-  is a good value but for problems where convergence is difficult `α_geodesic = 0.1` is an
-  effective choice. Defaults to `0.75`. For more details, see section 3, equation (15) of
-  [this paper](https://arxiv.org/abs/1201.5885).
-- `b_uphill`: a factor that determines if a step is accepted or rejected. The standard
-  choice in the Levenberg-Marquardt method is to accept all steps that decrease the cost
-  and reject all steps that increase the cost. Although this is a natural and safe choice,
-  it is often not the most efficient. Therefore downhill moves are always accepted, but
-  uphill moves are only conditionally accepted. To decide whether an uphill move will be
-  accepted at each iteration ``i``, we compute
-  ``\\beta_i = \\cos(v_{\\text{new}}, v_{\\text{old}})``, which denotes the cosine angle
-  between the proposed velocity ``v_{\\text{new}}`` and the velocity of the last accepted
-  step ``v_{\\text{old}}``. The idea is to accept uphill moves if the angle is small. To
-  specify, uphill moves are accepted if
-  ``(1-\\beta_i)^{b_{\\text{uphill}}} C_{i+1} \\le C_i``, where ``C_i`` is the cost at
-  iteration ``i``. Reasonable choices for `b_uphill` are `1.0` or `2.0`, with `b_uphill=2.0`
-  allowing higher uphill moves than `b_uphill=1.0`. When `b_uphill=0.0`, no uphill moves
-  will be accepted. Defaults to `1.0`. For more details, see section 4 of
-  [this paper](https://arxiv.org/abs/1201.5885).
-- `min_damping_D`: the minimum value of the damping terms in the diagonal damping matrix
-  `DᵀD`, where `DᵀD` is given by the largest diagonal entries of `JᵀJ` yet encountered,
-  where `J` is the Jacobian. It is suggested by
-  [this paper](https://arxiv.org/abs/1201.5885) to use a minimum value of the elements in
-  `DᵀD` to prevent the damping from being too small. Defaults to `1e-8`.
-
-
-!!! note
-
-    Currently, the linear solver and chunk size choice only applies to in-place defined
-    `NonlinearProblem`s. That is expected to change in the future.
+  - `autodiff`: determines the backend used for the Jacobian. Note that this argument is
+      ignored if an analytical Jacobian is passed, as that will be used instead. Defaults to
+      `AutoForwardDiff()`. Valid choices are types from ADTypes.jl.
+  - `concrete_jac`: whether to build a concrete Jacobian. If a Krylov-subspace method is used,
+    then the Jacobian will not be constructed and instead direct Jacobian-vector products
+    `J*v` are computed using forward-mode automatic differentiation or finite differencing
+    tricks (without ever constructing the Jacobian). However, if the Jacobian is still needed,
+    for example for a preconditioner, `concrete_jac = true` can be passed in order to force
+    the construction of the Jacobian.
+  - `linsolve`: the [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) used for the
+    linear solves within the Newton method. Defaults to `nothing`, which means it uses the
+    LinearSolve.jl default algorithm choice. For more information on available algorithm
+    choices, see the [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
+  - `precs`: the choice of preconditioners for the linear solver. Defaults to using no
+    preconditioners. For more information on specifying preconditioners for LinearSolve
+    algorithms, consult the
+    [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
+  - `damping_initial`: the starting value for the damping factor. The damping factor is
+    inversely proportional to the step size. The damping factor is adjusted during each
+    iteration. Defaults to `1.0`. For more details, see section 2.1 of
+    [this paper](https://arxiv.org/abs/1201.5885).
+  - `damping_increase_factor`: the factor by which the damping is increased if a step is
+    rejected. Defaults to `2.0`. For more details, see section 2.1 of
+    [this paper](https://arxiv.org/abs/1201.5885).
+  - `damping_decrease_factor`: the factor by which the damping is decreased if a step is
+    accepted. Defaults to `3.0`. For more details, see section 2.1 of
+    [this paper](https://arxiv.org/abs/1201.5885).
+  - `finite_diff_step_geodesic`: the step size used for finite differencing used to calculate
+    the geodesic acceleration. Defaults to `0.1` which means that the step size is
+    approximately 10% of the first-order step. For more details, see section 3 of
+    [this paper](https://arxiv.org/abs/1201.5885).
+  - `α_geodesic`: a factor that determines if a step is accepted or rejected. To incorporate
+    geodesic acceleration as an addition to the Levenberg-Marquardt algorithm, it is necessary
+    that acceptable steps meet the condition
+    ``\\frac{2||a||}{||v||} \\le \\alpha_{\\text{geodesic}}``, where ``a`` is the geodesic
+    acceleration, ``v`` is the Levenberg-Marquardt algorithm's step (velocity along a geodesic
+    path) and `α_geodesic` is some number of order `1`. For most problems `α_geodesic = 0.75`
+    is a good value but for problems where convergence is difficult `α_geodesic = 0.1` is an
+    effective choice. Defaults to `0.75`. For more details, see section 3, equation (15) of
+    [this paper](https://arxiv.org/abs/1201.5885).
+  - `b_uphill`: a factor that determines if a step is accepted or rejected. The standard
+    choice in the Levenberg-Marquardt method is to accept all steps that decrease the cost
+    and reject all steps that increase the cost. Although this is a natural and safe choice,
+    it is often not the most efficient. Therefore downhill moves are always accepted, but
+    uphill moves are only conditionally accepted. To decide whether an uphill move will be
+    accepted at each iteration ``i``, we compute
+    ``\\beta_i = \\cos(v_{\\text{new}}, v_{\\text{old}})``, which denotes the cosine angle
+    between the proposed velocity ``v_{\\text{new}}`` and the velocity of the last accepted
+    step ``v_{\\text{old}}``. The idea is to accept uphill moves if the angle is small. To
+    specify, uphill moves are accepted if
+    ``(1-\\beta_i)^{b_{\\text{uphill}}} C_{i+1} \\le C_i``, where ``C_i`` is the cost at
+    iteration ``i``. Reasonable choices for `b_uphill` are `1.0` or `2.0`, with `b_uphill=2.0`
+    allowing higher uphill moves than `b_uphill=1.0`. When `b_uphill=0.0`, no uphill moves
+    will be accepted. Defaults to `1.0`. For more details, see section 4 of
+    [this paper](https://arxiv.org/abs/1201.5885).
+  - `min_damping_D`: the minimum value of the damping terms in the diagonal damping matrix
+    `DᵀD`, where `DᵀD` is given by the largest diagonal entries of `JᵀJ` yet encountered,
+    where `J` is the Jacobian. It is suggested by
+    [this paper](https://arxiv.org/abs/1201.5885) to use a minimum value of the elements in
+    `DᵀD` to prevent the damping from being too small. Defaults to `1e-8`.
 """
-struct LevenbergMarquardt{CS, AD, FDT, L, P, ST, CJ, T} <:
-       AbstractNewtonAlgorithm{CS, AD, FDT, ST, CJ}
-    linsolve::L
-    precs::P
+@concrete struct LevenbergMarquardt{CJ, AD, T} <: AbstractNewtonAlgorithm{CJ, AD}
+    ad::AD
+    linsolve
+    precs
     damping_initial::T
     damping_increase_factor::T
     damping_decrease_factor::T
@@ -117,54 +86,36 @@ struct LevenbergMarquardt{CS, AD, FDT, L, P, ST, CJ, T} <:
     min_damping_D::T
 end
 
-function LevenbergMarquardt(; chunk_size = Val{0}(),
-    autodiff = Val{true}(),
-    standardtag = Val{true}(),
-    concrete_jac = nothing,
-    diff_type = Val{:forward},
-    linsolve = nothing,
-    precs = DEFAULT_PRECS,
-    damping_initial::Real = 1.0,
-    damping_increase_factor::Real = 2.0,
-    damping_decrease_factor::Real = 3.0,
-    finite_diff_step_geodesic::Real = 0.1,
-    α_geodesic::Real = 0.75,
-    b_uphill::Real = 1.0,
-    min_damping_D::AbstractFloat = 1e-8)
-    LevenbergMarquardt{_unwrap_val(chunk_size), _unwrap_val(autodiff), diff_type,
-        typeof(linsolve), typeof(precs), _unwrap_val(standardtag),
-        _unwrap_val(concrete_jac),
-        typeof(min_damping_D)}(linsolve, precs,
-        damping_initial,
-        damping_increase_factor,
-        damping_decrease_factor,
-        finite_diff_step_geodesic,
-        α_geodesic,
-        b_uphill,
-        min_damping_D)
+function LevenbergMarquardt(; concrete_jac = nothing, linsolve = nothing,
+    precs = DEFAULT_PRECS, damping_initial::Real = 1.0, damping_increase_factor::Real = 2.0,
+    damping_decrease_factor::Real = 3.0, finite_diff_step_geodesic::Real = 0.1,
+    α_geodesic::Real = 0.75, b_uphill::Real = 1.0, min_damping_D::AbstractFloat = 1e-8,
+    adkwargs...)
+    ad = default_adargs_to_adtype(adkwargs...)
+    return LevenbergMarquardt{_unwrap_val(concrete_jac)}(ad, linsolve, precs,
+        damping_initial, damping_increase_factor, damping_decrease_factor,
+        finite_diff_step_geodesic, α_geodesic, b_uphill, min_damping_D)
 end
 
-mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resType, pType,
-    INType, tolType, probType, ufType, L, jType, JC,
-    DᵀDType, λType, lossType,
-}
-    f::fType
-    alg::algType
+@concrete mutable struct LevenbergMarquardtCache{iip, uType, jType, λType, lossType}
+    f
+    alg
     u::uType
-    fu::resType
-    p::pType
-    uf::ufType
-    linsolve::L
+    fu1
+    fu2
+    du
+    p
+    uf
+    linsolve
     J::jType
-    du_tmp::duType
-    jac_config::JC
+    jac_cache
     force_stop::Bool
     maxiters::Int
-    internalnorm::INType
-    retcode::SciMLBase.ReturnCode.T
-    abstol::tolType
-    prob::probType
-    DᵀD::DᵀDType
+    internalnorm
+    retcode::ReturnCode.T
+    abstol
+    prob
+    DᵀD
     JᵀJ::jType
     λ::λType
     λ_factor::λType
@@ -182,75 +133,25 @@ mutable struct LevenbergMarquardtCache{iip, fType, algType, uType, duType, resTy
     δ::uType
     loss_old::lossType
     make_new_J::Bool
-    fu_tmp::resType
+    fu_tmp
     mat_tmp::jType
     stats::NLStats
-
-    function LevenbergMarquardtCache{iip}(f::fType, alg::algType, u::uType, fu::resType,
-        p::pType, uf::ufType, linsolve::L, J::jType,
-        du_tmp::duType, jac_config::JC,
-        force_stop::Bool, maxiters::Int,
-        internalnorm::INType,
-        retcode::SciMLBase.ReturnCode.T, abstol::tolType,
-        prob::probType, DᵀD::DᵀDType, JᵀJ::jType,
-        λ::λType, λ_factor::λType,
-        damping_increase_factor::λType,
-        damping_decrease_factor::λType, h::λType,
-        α_geodesic::λType, b_uphill::λType,
-        min_damping_D::λType, v::uType,
-        a::uType, tmp_vec::uType, v_old::uType,
-        norm_v_old::lossType, δ::uType,
-        loss_old::lossType, make_new_J::Bool,
-        fu_tmp::resType,
-        mat_tmp::jType,
-        stats::NLStats) where {
-        iip, fType, algType,
-        uType, duType, resType,
-        pType, INType, tolType,
-        probType, ufType, L,
-        jType, JC, DᵀDType,
-        λType, lossType,
-    }
-        new{iip, fType, algType, uType, duType, resType,
-            pType, INType, tolType, probType, ufType, L,
-            jType, JC, DᵀDType, λType, lossType}(f, alg, u, fu, p, uf, linsolve, J, du_tmp,
-            jac_config, force_stop, maxiters,
-            internalnorm, retcode, abstol, prob, DᵀD,
-            JᵀJ, λ, λ_factor,
-            damping_increase_factor,
-            damping_decrease_factor, h,
-            α_geodesic, b_uphill, min_damping_D,
-            v, a, tmp_vec, v_old,
-            norm_v_old, δ, loss_old, make_new_J,
-            fu_tmp, mat_tmp, stats)
-    end
 end
 
-function jacobian_caches(alg::LevenbergMarquardt, f, u, p, ::Val{false})
-    JacobianWrapper(f, p), nothing, ArrayInterface.undefmatrix(u), nothing, nothing
-end
+isinplace(::LevenbergMarquardtCache{iip}) where {iip} = iip
 
 function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LevenbergMarquardt,
-    args...;
-    alias_u0 = false,
-    maxiters = 1000,
-    abstol = 1e-6,
-    internalnorm = DEFAULT_NORM,
+    args...; alias_u0 = false, maxiters = 1000, abstol = 1e-6, internalnorm = DEFAULT_NORM,
     kwargs...) where {uType, iip}
-    if alias_u0
-        u = prob.u0
-    else
-        u = deepcopy(prob.u0)
-    end
-    f = prob.f
-    p = prob.p
+    @unpack f, u0, p = prob
+    u = alias_u0 ? u0 : deepcopy(u0)
     if iip
-        fu = zero(u)
-        f(fu, u, p)
+        fu1 = zero(u)  # TODO: Use Prototype
+        f(fu1, u, p)
     else
-        fu = f(u, p)
+        fu1 = f(u, p)
     end
-    uf, linsolve, J, du_tmp, jac_config = jacobian_caches(alg, f, u, p, Val(iip))
+    uf, linsolve, J, fu2, jac_cache = jacobian_caches(alg, f, u, p, Val(iip))
 
     λ = convert(eltype(u), alg.damping_initial)
     λ_factor = convert(eltype(u), alg.damping_increase_factor)
@@ -269,7 +170,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LevenbergMarq
         DᵀD = Diagonal(d)
     end
 
-    loss = internalnorm(fu)
+    loss = internalnorm(fu1)
     JᵀJ = zero(J)
     v = zero(u)
     a = zero(u)
@@ -277,26 +178,25 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LevenbergMarq
     v_old = zero(u)
     δ = zero(u)
     make_new_J = true
-    fu_tmp = zero(fu)
+    fu_tmp = zero(fu1)
     mat_tmp = zero(J)
 
-    return LevenbergMarquardtCache{iip}(f, alg, u, fu, p, uf, linsolve, J, du_tmp,
-        jac_config, false, maxiters, internalnorm,
-        ReturnCode.Default, abstol, prob, DᵀD, JᵀJ,
-        λ, λ_factor, damping_increase_factor,
-        damping_decrease_factor, h,
-        α_geodesic, b_uphill, min_damping_D,
-        v, a, tmp_vec, v_old, loss, δ, loss, make_new_J,
-        fu_tmp, mat_tmp, NLStats(1, 0, 0, 0, 0))
+    return LevenbergMarquardtCache{iip}(f, alg, u, fu1, fu2, zero(u), p, uf, linsolve, J,
+        jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob, DᵀD,
+        JᵀJ, λ, λ_factor, damping_increase_factor, damping_decrease_factor, h, α_geodesic,
+        b_uphill, min_damping_D, v, a, tmp_vec, v_old, loss, δ, loss, make_new_J, fu_tmp,
+        mat_tmp, NLStats(1, 0, 0, 0, 0))
 end
+
 function perform_step!(cache::LevenbergMarquardtCache{true})
-    @unpack fu, f, make_new_J = cache
-    if iszero(fu)
+    @unpack fu1, f, make_new_J = cache
+    if iszero(fu1)
         cache.force_stop = true
         return nothing
     end
+
     if make_new_J
-        jacobian!(cache.J, cache)
+        jacobian!!(cache.J, cache)
         mul!(cache.JᵀJ, cache.J', cache.J)
         cache.DᵀD .= max.(cache.DᵀD, Diagonal(cache.JᵀJ))
         cache.make_new_J = false
@@ -306,24 +206,24 @@ function perform_step!(cache::LevenbergMarquardtCache{true})
 
     # Usual Levenberg-Marquardt step ("velocity").
     # The following lines do: cache.v = -cache.mat_tmp \ cache.fu_tmp
-    mul!(cache.fu_tmp, J', fu)
+    mul!(cache.fu_tmp, J', fu1)
     @. cache.mat_tmp = JᵀJ + λ * DᵀD
     linres = dolinsolve(alg.precs, linsolve, A = cache.mat_tmp, b = _vec(cache.fu_tmp),
-        linu = _vec(cache.du_tmp), p = p, reltol = cache.abstol)
+        linu = _vec(cache.du), p = p, reltol = cache.abstol)
     cache.linsolve = linres.cache
-    @. cache.v = -cache.du_tmp
+    @. cache.v = -cache.du
 
     # Geodesic acceleration (step_size = v + a / 2).
     @unpack v, α_geodesic, h = cache
     f(cache.fu_tmp, u .+ h .* v, p)
 
     # The following lines do: cache.a = -J \ cache.fu_tmp
-    mul!(cache.du_tmp, J, v)
-    @. cache.fu_tmp = (2 / h) * ((cache.fu_tmp - fu) / h - cache.du_tmp)
+    mul!(cache.du, J, v)
+    @. cache.fu_tmp = (2 / h) * ((cache.fu_tmp - fu1) / h - cache.du)
     linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(cache.fu_tmp),
-        linu = _vec(cache.du_tmp), p = p, reltol = cache.abstol)
+        linu = _vec(cache.du), p = p, reltol = cache.abstol)
     cache.linsolve = linres.cache
-    @. cache.a = -cache.du_tmp
+    @. cache.a = -cache.du
     cache.stats.nsolve += 2
     cache.stats.nfactors += 2
 
@@ -345,7 +245,7 @@ function perform_step!(cache::LevenbergMarquardtCache{true})
                 cache.force_stop = true
                 return nothing
             end
-            cache.fu .= cache.fu_tmp
+            cache.fu1 .= cache.fu_tmp
             cache.v_old .= v
             cache.norm_v_old = norm_v
             cache.loss_old = loss
@@ -359,13 +259,14 @@ function perform_step!(cache::LevenbergMarquardtCache{true})
 end
 
 function perform_step!(cache::LevenbergMarquardtCache{false})
-    @unpack fu, f, make_new_J = cache
-    if iszero(fu)
+    @unpack fu1, f, make_new_J = cache
+    if iszero(fu1)
         cache.force_stop = true
         return nothing
     end
+
     if make_new_J
-        cache.J = jacobian(cache, f)
+        cache.J = jacobian!!(cache.J, cache)
         cache.JᵀJ = cache.J' * cache.J
         if cache.JᵀJ isa Number
             cache.DᵀD = max(cache.DᵀD, cache.JᵀJ)
@@ -378,11 +279,11 @@ function perform_step!(cache::LevenbergMarquardtCache{false})
     @unpack u, p, λ, JᵀJ, DᵀD, J = cache
 
     # Usual Levenberg-Marquardt step ("velocity").
-    cache.v = -(JᵀJ + λ * DᵀD) \ (J' * fu)
+    cache.v = -(JᵀJ + λ * DᵀD) \ (J' * fu1)
 
     @unpack v, h, α_geodesic = cache
     # Geodesic acceleration (step_size = v + a / 2).
-    cache.a = -J \ ((2 / h) .* ((f(u .+ h .* v, p) .- fu) ./ h .- J * v))
+    cache.a = -J \ ((2 / h) .* ((f(u .+ h .* v, p) .- fu1) ./ h .- J * v))
     cache.stats.nsolve += 1
     cache.stats.nfactors += 1
 
@@ -404,7 +305,7 @@ function perform_step!(cache::LevenbergMarquardtCache{false})
                 cache.force_stop = true
                 return nothing
             end
-            cache.fu = fu_new
+            cache.fu1 = fu_new
             cache.v_old = v
             cache.norm_v_old = norm_v
             cache.loss_old = loss
@@ -429,6 +330,6 @@ function SciMLBase.solve!(cache::LevenbergMarquardtCache)
         cache.retcode = ReturnCode.Success
     end
 
-    SciMLBase.build_solution(cache.prob, cache.alg, cache.u, cache.fu;
-        retcode = cache.retcode, stats = cache.stats)
+    return SciMLBase.build_solution(cache.prob, cache.alg, cache.u, cache.fu1;
+        cache.retcode, cache.stats)
 end
diff --git a/src/raphson.jl b/src/raphson.jl
index 24e5799fd..d780d5077 100644
--- a/src/raphson.jl
+++ b/src/raphson.jl
@@ -1,9 +1,6 @@
 """
-```julia
-NewtonRaphson(; chunk_size = Val{0}(), autodiff = Val{true}(),
-              standardtag = Val{true}(), concrete_jac = nothing,
-              diff_type = Val{:forward}, linsolve = nothing, precs = DEFAULT_PRECS)
-```
+    NewtonRaphson(; concrete_jac = nothing, linsolve = nothing,
+        precs = DEFAULT_PRECS, adkwargs...)
 
 An advanced NewtonRaphson implementation with support for efficient handling of sparse
 matrices via colored automatic differentiation and preconditioned linear solvers. Designed
@@ -11,29 +8,15 @@ for large-scale and numerically-difficult nonlinear systems.
 
 ### Keyword Arguments
 
-  - `chunk_size`: the chunk size used by the internal ForwardDiff.jl automatic differentiation
-    system. This allows for multiple derivative columns to be computed simultaneously,
-    improving performance. Defaults to `0`, which is equivalent to using ForwardDiff.jl's
-    default chunk size mechanism. For more details, see the documentation for
-    [ForwardDiff.jl](https://juliadiff.org/ForwardDiff.jl/stable/).
-  - `autodiff`: whether to use forward-mode automatic differentiation for the Jacobian.
-    Note that this argument is ignored if an analytical Jacobian is passed, as that will be
-    used instead. Defaults to `Val{true}`, which means ForwardDiff.jl via
-    SparseDiffTools.jl is used by default. If `Val{false}`, then FiniteDiff.jl is used for
-    finite differencing.
-  - `standardtag`: whether to use a standardized tag definition for the purposes of automatic
-    differentiation. Defaults to true, which thus uses the `NonlinearSolveTag`. If `Val{false}`,
-    then ForwardDiff's default function naming tag is used, which results in larger stack
-    traces.
+  - `autodiff`: determines the backend used for the Jacobian. Note that this argument is
+    ignored if an analytical Jacobian is passed, as that will be used instead. Defaults to
+    `AutoForwardDiff()`. Valid choices are types from ADTypes.jl.
   - `concrete_jac`: whether to build a concrete Jacobian. If a Krylov-subspace method is used,
     then the Jacobian will not be constructed and instead direct Jacobian-vector products
     `J*v` are computed using forward-mode automatic differentiation or finite differencing
     tricks (without ever constructing the Jacobian). However, if the Jacobian is still needed,
     for example for a preconditioner, `concrete_jac = true` can be passed in order to force
     the construction of the Jacobian.
-  - `diff_type`: the type of finite differencing used if `autodiff = false`. Defaults to
-    `Val{:forward}` for forward finite differences. For more details on the choices, see the
-    [FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl) documentation.
   - `linsolve`: the [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) used for the
     linear solves within the Newton method. Defaults to `nothing`, which means it uses the
     LinearSolve.jl default algorithm choice. For more information on available algorithm
@@ -42,114 +25,74 @@ for large-scale and numerically-difficult nonlinear systems.
     preconditioners. For more information on specifying preconditioners for LinearSolve
     algorithms, consult the
     [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
-
-!!! note
-
-    Currently, the linear solver and chunk size choice only applies to in-place defined
-    `NonlinearProblem`s. That is expected to change in the future.
 """
-struct NewtonRaphson{CS, AD, FDT, L, P, ST, CJ} <:
-       AbstractNewtonAlgorithm{CS, AD, FDT, ST, CJ}
-    linsolve::L
-    precs::P
+@concrete struct NewtonRaphson{CJ, AD} <: AbstractNewtonAlgorithm{CJ, AD}
+    ad::AD
+    linsolve
+    precs
 end
 
-function NewtonRaphson(; chunk_size = Val{0}(), autodiff = Val{true}(),
-    standardtag = Val{true}(), concrete_jac = nothing,
-    diff_type = Val{:forward}, linsolve = nothing, precs = DEFAULT_PRECS)
-    NewtonRaphson{_unwrap_val(chunk_size), _unwrap_val(autodiff), diff_type,
-        typeof(linsolve), typeof(precs), _unwrap_val(standardtag),
-        _unwrap_val(concrete_jac)}(linsolve,
-        precs)
+concrete_jac(::NewtonRaphson{CJ}) where {CJ} = CJ
+
+function NewtonRaphson(; concrete_jac = nothing, linsolve = nothing,
+    precs = DEFAULT_PRECS, adkwargs...)
+    ad = default_adargs_to_adtype(adkwargs...)
+    return NewtonRaphson{_unwrap_val(concrete_jac)}(ad, linsolve, precs)
 end
 
-mutable struct NewtonRaphsonCache{iip, fType, algType, uType, duType, resType, pType,
-    INType, tolType,
-    probType, ufType, L, jType, JC}
-    f::fType
-    alg::algType
-    u::uType
-    fu::resType
-    p::pType
-    uf::ufType
-    linsolve::L
-    J::jType
-    du1::duType
-    jac_config::JC
-    force_stop::Bool
+@concrete mutable struct NewtonRaphsonCache{iip}
+    f
+    alg
+    u
+    fu1
+    fu2
+    du
+    p
+    uf
+    linsolve
+    J
+    jac_cache
+    force_stop
     maxiters::Int
-    internalnorm::INType
-    retcode::SciMLBase.ReturnCode.T
-    abstol::tolType
-    prob::probType
+    internalnorm
+    retcode::ReturnCode.T
+    abstol
+    prob
     stats::NLStats
-
-    function NewtonRaphsonCache{iip}(f::fType, alg::algType, u::uType, fu::resType,
-        p::pType, uf::ufType, linsolve::L, J::jType,
-        du1::duType,
-        jac_config::JC, force_stop::Bool, maxiters::Int,
-        internalnorm::INType,
-        retcode::SciMLBase.ReturnCode.T, abstol::tolType,
-        prob::probType,
-        stats::NLStats) where {
-        iip, fType, algType, uType,
-        duType, resType, pType, INType,
-        tolType,
-        probType, ufType, L, jType, JC}
-        new{iip, fType, algType, uType, duType, resType, pType, INType, tolType,
-            probType, ufType, L, jType, JC}(f, alg, u, fu, p,
-            uf, linsolve, J, du1, jac_config,
-            force_stop, maxiters, internalnorm,
-            retcode, abstol, prob, stats)
-    end
 end
 
-function jacobian_caches(alg::NewtonRaphson, f, u, p, ::Val{false})
-    JacobianWrapper(f, p), nothing, nothing, nothing, nothing
-end
+isinplace(::NewtonRaphsonCache{iip}) where {iip} = iip
 
-function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::NewtonRaphson,
-    args...;
-    alias_u0 = false,
-    maxiters = 1000,
-    abstol = 1e-6,
-    internalnorm = DEFAULT_NORM,
+function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::NewtonRaphson, args...;
+    alias_u0 = false, maxiters = 1000, abstol = 1e-6, internalnorm = DEFAULT_NORM,
     kwargs...) where {uType, iip}
-    if alias_u0
-        u = prob.u0
-    else
-        u = deepcopy(prob.u0)
-    end
-    f = prob.f
-    p = prob.p
+    @unpack f, u0, p = prob
+    u = alias_u0 ? u0 : deepcopy(u0)
     if iip
-        fu = zero(u)
-        f(fu, u, p)
+        fu1 = zero(u)  # TODO: Use Prototype
+        f(fu1, u, p)
     else
-        fu = f(u, p)
+        fu1 = f(u, p)
     end
-    uf, linsolve, J, du1, jac_config = jacobian_caches(alg, f, u, p, Val(iip))
+    uf, linsolve, J, fu2, jac_cache = jacobian_caches(alg, f, u, p, Val(iip))
 
-    return NewtonRaphsonCache{iip}(f, alg, u, fu, p, uf, linsolve, J, du1, jac_config,
-        false, maxiters, internalnorm,
-        ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0))
+    return NewtonRaphsonCache{iip}(f, alg, u, fu1, fu2, zero(u), p, uf, linsolve, J,
+        jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob,
+        NLStats(1, 0, 0, 0, 0))
 end
 
 function perform_step!(cache::NewtonRaphsonCache{true})
-    @unpack u, fu, f, p, alg = cache
-    @unpack J, linsolve, du1 = cache
-    jacobian!(J, cache)
+    @unpack u, fu1, f, p, alg, J, linsolve, du = cache
+    jacobian!!(J, cache)
 
     # u = u - J \ fu
-    linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(fu), linu = _vec(du1),
-        p = p, reltol = cache.abstol)
+    linres = dolinsolve(alg.precs, linsolve; A = J, b = _vec(fu1), linu = _vec(du),
+        p, reltol = cache.abstol)
     cache.linsolve = linres.cache
-    @. u = u - du1
-    f(fu, u, p)
+    @. u = u - du
+    f(fu1, u, p)
 
-    if cache.internalnorm(cache.fu) < cache.abstol
-        cache.force_stop = true
-    end
+    cache.internalnorm(fu1) < cache.abstol && (cache.force_stop = true)
     cache.stats.nf += 1
     cache.stats.njacs += 1
     cache.stats.nsolve += 1
@@ -158,13 +101,17 @@ function perform_step!(cache::NewtonRaphsonCache{true})
 end
 
 function perform_step!(cache::NewtonRaphsonCache{false})
-    @unpack u, fu, f, p = cache
-    J = jacobian(cache, f)
-    cache.u = u - J \ fu
-    cache.fu = f(cache.u, p)
-    if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol
-        cache.force_stop = true
-    end
+    @unpack u, fu1, f, p, alg, linsolve, du = cache
+
+    cache.J = jacobian!!(cache.J, cache)
+    # u = u - J \ fu
+    linres = dolinsolve(alg.precs, linsolve; A = cache.J, b = _vec(fu1), linu = _vec(du),
+        p, reltol = cache.abstol)
+    cache.linsolve = linres.cache
+    @. u = u - du
+    cache.fu1 = f(u, p)
+
+    cache.internalnorm(fu1) < cache.abstol && (cache.force_stop = true)
     cache.stats.nf += 1
     cache.stats.njacs += 1
     cache.stats.nsolve += 1
@@ -184,8 +131,8 @@ function SciMLBase.solve!(cache::NewtonRaphsonCache)
         cache.retcode = ReturnCode.Success
     end
 
-    SciMLBase.build_solution(cache.prob, cache.alg, cache.u, cache.fu;
-        retcode = cache.retcode, stats = cache.stats)
+    return SciMLBase.build_solution(cache.prob, cache.alg, cache.u, cache.fu1;
+        cache.retcode, cache.stats)
 end
 
 function SciMLBase.reinit!(cache::NewtonRaphsonCache{iip}, u0 = cache.u; p = cache.p,
@@ -193,11 +140,11 @@ function SciMLBase.reinit!(cache::NewtonRaphsonCache{iip}, u0 = cache.u; p = cac
     cache.p = p
     if iip
         recursivecopy!(cache.u, u0)
-        cache.f(cache.fu, cache.u, p)
+        cache.f(cache.fu1, cache.u, p)
     else
         # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter
         cache.u = u0
-        cache.fu = cache.f(cache.u, p)
+        cache.fu1 = cache.f(cache.u, p)
     end
     cache.abstol = abstol
     cache.maxiters = maxiters
diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index 6e867699c..c43b86699 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -14,7 +14,7 @@ states as `RadiusUpdateSchemes.T`. Simply put the desired scheme as follows:
 `TrustRegion(radius_update_scheme = your desired update scheme)`. For example,
 `sol = solve(prob, alg=TrustRegion(radius_update_scheme = RadiusUpdateSchemes.Hei))`.
 """
-EnumX.@enumx RadiusUpdateSchemes begin
+@enumx RadiusUpdateSchemes begin
     """
     `RadiusUpdateSchemes.Simple`
 
@@ -68,19 +68,12 @@ end
 
 """
 ```julia
-TrustRegion(; chunk_size = Val{0}(), autodiff = Val{true}(),
-            standardtag = Val{true}(), concrete_jac = nothing,
-            diff_type = Val{:forward}, linsolve = nothing, precs = DEFAULT_PRECS,
-            radius_update_scheme = RadiusUpdateSchemes.Simple,
-            max_trust_radius::Real = 0 // 1,
-            initial_trust_radius::Real = 0 // 1,
-            step_threshold::Real = 1 // 10,
-            shrink_threshold::Real = 1 // 4,
-            expand_threshold::Real = 3 // 4,
-            shrink_factor::Real = 1 // 4,
-            expand_factor::Real = 2 // 1,
-            max_shrink_times::Int = 32)
-```
+    TrustRegion(; concrete_jac = nothing, linsolve = nothing, precs = DEFAULT_PRECS,
+        radius_update_scheme::RadiusUpdateSchemes.T = RadiusUpdateSchemes.Simple,
+        max_trust_radius::Real = 0 // 1, initial_trust_radius::Real = 0 // 1,
+        step_threshold::Real = 1 // 10, shrink_threshold::Real = 1 // 4,
+        expand_threshold::Real = 3 // 4, shrink_factor::Real = 1 // 4,
+        expand_factor::Real = 2 // 1, max_shrink_times::Int = 32, adkwargs...)
 
 An advanced TrustRegion implementation with support for efficient handling of sparse
 matrices via colored automatic differentiation and preconditioned linear solvers. Designed
@@ -88,29 +81,15 @@ for large-scale and numerically-difficult nonlinear systems.
 
 ### Keyword Arguments
 
-  - `chunk_size`: the chunk size used by the internal ForwardDiff.jl automatic differentiation
-    system. This allows for multiple derivative columns to be computed simultaneously,
-    improving performance. Defaults to `0`, which is equivalent to using ForwardDiff.jl's
-    default chunk size mechanism. For more details, see the documentation for
-    [ForwardDiff.jl](https://juliadiff.org/ForwardDiff.jl/stable/).
-  - `autodiff`: whether to use forward-mode automatic differentiation for the Jacobian.
-    Note that this argument is ignored if an analytical Jacobian is passed, as that will be
-    used instead. Defaults to `Val{true}`, which means ForwardDiff.jl via
-    SparseDiffTools.jl is used by default. If `Val{false}`, then FiniteDiff.jl is used for
-    finite differencing.
-  - `standardtag`: whether to use a standardized tag definition for the purposes of automatic
-    differentiation. Defaults to true, which thus uses the `NonlinearSolveTag`. If `Val{false}`,
-    then ForwardDiff's default function naming tag is used, which results in larger stack
-    traces.
+  - `autodiff`: determines the backend used for the Jacobian. Note that this argument is
+    ignored if an analytical Jacobian is passed, as that will be used instead. Defaults to
+    `AutoForwardDiff()`. Valid choices are types from ADTypes.jl.
   - `concrete_jac`: whether to build a concrete Jacobian. If a Krylov-subspace method is used,
     then the Jacobian will not be constructed and instead direct Jacobian-vector products
     `J*v` are computed using forward-mode automatic differentiation or finite differencing
     tricks (without ever constructing the Jacobian). However, if the Jacobian is still needed,
     for example for a preconditioner, `concrete_jac = true` can be passed in order to force
     the construction of the Jacobian.
-  - `diff_type`: the type of finite differencing used if `autodiff = false`. Defaults to
-    `Val{:forward}` for forward finite differences. For more details on the choices, see the
-    [FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl) documentation.
   - `linsolve`: the [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) used for the
     linear solves within the Newton method. Defaults to `nothing`, which means it uses the
     LinearSolve.jl default algorithm choice. For more information on available algorithm
@@ -148,18 +127,13 @@ for large-scale and numerically-difficult nonlinear systems.
     `expand_threshold < r` (with `r` defined in `shrink_threshold`). Defaults to `2.0`.
   - `max_shrink_times`: the maximum number of times to shrink the trust region radius in a
     row, `max_shrink_times` is exceeded, the algorithm returns. Defaults to `32`.
-
-!!! note
-
-    Currently, the linear solver and chunk size choice only applies to in-place defined
-    `NonlinearProblem`s. That is expected to change in the future.
 """
-struct TrustRegion{CS, AD, FDT, L, P, ST, CJ, MTR} <:
-       AbstractNewtonAlgorithm{CS, AD, FDT, ST, CJ}
-    linsolve::L
-    precs::P
+@concrete struct TrustRegion{CJ, AD, MTR} <: AbstractNewtonAlgorithm{CJ, AD}
+    ad::AD
+    linsolve
+    precs
     radius_update_scheme::RadiusUpdateSchemes.T
-    max_trust_radius::MTR
+    max_trust_radius
     initial_trust_radius::MTR
     step_threshold::MTR
     shrink_threshold::MTR
@@ -169,535 +143,477 @@ struct TrustRegion{CS, AD, FDT, L, P, ST, CJ, MTR} <:
     max_shrink_times::Int
 end
 
-function TrustRegion(; chunk_size = Val{0}(),
-    autodiff = Val{true}(),
-    standardtag = Val{true}(), concrete_jac = nothing,
-    diff_type = Val{:forward}, linsolve = nothing, precs = DEFAULT_PRECS,
+function TrustRegion(; concrete_jac = nothing, linsolve = nothing, precs = DEFAULT_PRECS,
     radius_update_scheme::RadiusUpdateSchemes.T = RadiusUpdateSchemes.Simple, #defaults to conventional radius update
-    max_trust_radius::Real = 0 // 1,
-    initial_trust_radius::Real = 0 // 1,
-    step_threshold::Real = 1 // 10,
-    shrink_threshold::Real = 1 // 4,
-    expand_threshold::Real = 3 // 4,
-    shrink_factor::Real = 1 // 4,
-    expand_factor::Real = 2 // 1,
-    max_shrink_times::Int = 32)
-    TrustRegion{_unwrap_val(chunk_size), _unwrap_val(autodiff), diff_type,
-        typeof(linsolve), typeof(precs), _unwrap_val(standardtag),
-        _unwrap_val(concrete_jac), typeof(max_trust_radius),
-    }(linsolve, precs, radius_update_scheme, max_trust_radius,
-        initial_trust_radius,
-        step_threshold,
-        shrink_threshold,
-        expand_threshold,
-        shrink_factor,
-        expand_factor,
-        max_shrink_times)
-end
-
-mutable struct TrustRegionCache{iip, fType, algType, uType, resType, pType,
-    INType, tolType, probType, ufType, L, jType, JC, floatType,
-    trustType, suType, su2Type, tmpType}
-    f::fType
-    alg::algType
-    u_prev::uType
-    u::uType
-    fu_prev::resType
-    fu::resType
-    p::pType
-    uf::ufType
-    linsolve::L
-    J::jType
-    jac_config::JC
-    force_stop::Bool
-    maxiters::Int
-    internalnorm::INType
-    retcode::SciMLBase.ReturnCode.T
-    abstol::tolType
-    prob::probType
-    radius_update_scheme::RadiusUpdateSchemes.T
-    trust_r::trustType
-    max_trust_r::trustType
-    step_threshold::suType
-    shrink_threshold::trustType
-    expand_threshold::trustType
-    shrink_factor::trustType
-    expand_factor::trustType
-    loss::floatType
-    loss_new::floatType
-    H::jType
-    g::resType
-    shrink_counter::Int
-    step_size::su2Type
-    u_tmp::tmpType
-    fu_new::resType
-    make_new_J::Bool
-    r::floatType
-    p1::floatType
-    p2::floatType
-    p3::floatType
-    p4::floatType
-    ϵ::floatType
-    stats::NLStats
-
-    function TrustRegionCache{iip}(f::fType, alg::algType, u_prev::uType, u::uType,
-        fu_prev::resType, fu::resType, p::pType,
-        uf::ufType, linsolve::L, J::jType, jac_config::JC,
-        force_stop::Bool, maxiters::Int, internalnorm::INType,
-        retcode::SciMLBase.ReturnCode.T, abstol::tolType,
-        prob::probType,
-        radius_update_scheme::RadiusUpdateSchemes.T,
-        trust_r::trustType,
-        max_trust_r::trustType, step_threshold::suType,
-        shrink_threshold::trustType, expand_threshold::trustType,
-        shrink_factor::trustType, expand_factor::trustType,
-        loss::floatType, loss_new::floatType, H::jType,
-        g::resType, shrink_counter::Int, step_size::su2Type,
-        u_tmp::tmpType, fu_new::resType, make_new_J::Bool,
-        r::floatType, p1::floatType, p2::floatType,
-        p3::floatType, p4::floatType, ϵ::floatType,
-        stats::NLStats) where {iip, fType, algType, uType,
-        resType, pType, INType,
-        tolType, probType, ufType, L,
-        jType, JC, floatType, trustType,
-        suType, su2Type, tmpType}
-        new{iip, fType, algType, uType, resType, pType,
-            INType, tolType, probType, ufType, L, jType, JC, floatType,
-            trustType, suType, su2Type, tmpType}(f, alg, u_prev, u, fu_prev, fu, p, uf,
-            linsolve, J,
-            jac_config, force_stop,
-            maxiters, internalnorm, retcode,
-            abstol, prob, radius_update_scheme,
-            trust_r, max_trust_r,
-            step_threshold, shrink_threshold,
-            expand_threshold, shrink_factor,
-            expand_factor, loss,
-            loss_new, H, g, shrink_counter,
-            step_size, u_tmp, fu_new,
-            make_new_J, r, p1, p2, p3, p4, ϵ, stats)
-    end
-end
-
-function jacobian_caches(alg::TrustRegion, f, u, p, ::Val{false})
-    J = ArrayInterface.undefmatrix(u)
-    JacobianWrapper(f, p), nothing, J, zero(u), nothing
-end
-
-function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
-    args...;
-    alias_u0 = false,
-    maxiters = 1000,
-    abstol = 1e-8,
-    internalnorm = DEFAULT_NORM,
-    kwargs...) where {uType, iip}
-    if alias_u0
-        u = prob.u0
-    else
-        u = deepcopy(prob.u0)
-    end
-    u_prev = zero(u)
-    f = prob.f
-    p = prob.p
-    if iip
-        fu = zero(u)
-        f(fu, u, p)
-    else
-        fu = f(u, p)
-    end
-    fu_prev = zero(fu)
-
-    loss = get_loss(fu)
-    uf, linsolve, J, u_tmp, jac_config = jacobian_caches(alg, f, u, p, Val(iip))
-
-    radius_update_scheme = alg.radius_update_scheme
-    max_trust_radius = convert(eltype(u), alg.max_trust_radius)
-    initial_trust_radius = convert(eltype(u), alg.initial_trust_radius)
-    step_threshold = convert(eltype(u), alg.step_threshold)
-    shrink_threshold = convert(eltype(u), alg.shrink_threshold)
-    expand_threshold = convert(eltype(u), alg.expand_threshold)
-    shrink_factor = convert(eltype(u), alg.shrink_factor)
-    expand_factor = convert(eltype(u), alg.expand_factor)
-    # Set default trust region radius if not specified
-    if iszero(max_trust_radius)
-        max_trust_radius = convert(eltype(u), max(norm(fu), maximum(u) - minimum(u)))
-    end
-    if iszero(initial_trust_radius)
-        initial_trust_radius = convert(eltype(u), max_trust_radius / 11)
-    end
-
-    loss_new = loss
-    H = ArrayInterface.undefmatrix(u)
-    g = zero(fu)
-    shrink_counter = 0
-    step_size = zero(u)
-    fu_new = zero(fu)
-    make_new_J = true
-    r = loss
-
-    # Parameters for the Schemes
-    p1 = convert(eltype(u), 0.0)
-    p2 = convert(eltype(u), 0.0)
-    p3 = convert(eltype(u), 0.0)
-    p4 = convert(eltype(u), 0.0)
-    ϵ = convert(eltype(u), 1.0e-8)
-    if radius_update_scheme === RadiusUpdateSchemes.Hei
-        step_threshold = convert(eltype(u), 0.0)
-        shrink_threshold = convert(eltype(u), 0.25)
-        expand_threshold = convert(eltype(u), 0.25)
-        p1 = convert(eltype(u), 5.0) # M
-        p2 = convert(eltype(u), 0.1) # β
-        p3 = convert(eltype(u), 0.15) # γ1
-        p4 = convert(eltype(u), 0.15) # γ2
-        initial_trust_radius = convert(eltype(u), 1.0)
-    elseif radius_update_scheme === RadiusUpdateSchemes.Yuan
-        step_threshold = convert(eltype(u), 0.0001)
-        shrink_threshold = convert(eltype(u), 0.25)
-        expand_threshold = convert(eltype(u), 0.25)
-        p1 = convert(eltype(u), 2.0) # μ
-        p2 = convert(eltype(u), 1 / 6) # c5
-        p3 = convert(eltype(u), 6.0) # c6
-        p4 = convert(eltype(u), 0.0)
-        if iip
-            auto_jacvec!(g, (fu, x) -> f(fu, x, p), u, fu)
-        else
-            if isa(u, Number)
-                g = ForwardDiff.derivative(x -> f(x, p), u)
-            else
-                g = auto_jacvec(x -> f(x, p), u, fu)
-            end
-        end
-        initial_trust_radius = convert(eltype(u), p1 * norm(g))
-    elseif radius_update_scheme === RadiusUpdateSchemes.Fan
-        step_threshold = convert(eltype(u), 0.0001)
-        shrink_threshold = convert(eltype(u), 0.25)
-        expand_threshold = convert(eltype(u), 0.75)
-        p1 = convert(eltype(u), 0.1) # μ
-        p2 = convert(eltype(u), 1 / 4) # c5
-        p3 = convert(eltype(u), 12) # c6
-        p4 = convert(eltype(u), 1.0e18) # M
-        initial_trust_radius = convert(eltype(u), p1 * (norm(fu)^0.99))
-    elseif radius_update_scheme === RadiusUpdateSchemes.Bastin
-        step_threshold = convert(eltype(u), 0.05)
-        shrink_threshold = convert(eltype(u), 0.05)
-        expand_threshold = convert(eltype(u), 0.9)
-        p1 = convert(eltype(u), 2.5)  #alpha_1
-        p2 = convert(eltype(u), 0.25) # alpha_2
-        p3 = convert(eltype(u), 0) # not required
-        p4 = convert(eltype(u), 0) # not required
-        initial_trust_radius = convert(eltype(u), 1.0)
-    end
-
-    return TrustRegionCache{iip}(f, alg, u_prev, u, fu_prev, fu, p, uf, linsolve, J,
-        jac_config,
-        false, maxiters, internalnorm,
-        ReturnCode.Default, abstol, prob, radius_update_scheme,
-        initial_trust_radius,
-        max_trust_radius, step_threshold, shrink_threshold,
-        expand_threshold, shrink_factor, expand_factor, loss,
-        loss_new, H, g, shrink_counter, step_size, u_tmp, fu_new,
-        make_new_J, r, p1, p2, p3, p4, ϵ, NLStats(1, 0, 0, 0, 0))
-end
-
-function perform_step!(cache::TrustRegionCache{true})
-    @unpack make_new_J, J, fu, f, u, p, u_tmp, alg, linsolve = cache
-    if cache.make_new_J
-        jacobian!(J, cache)
-        mul!(cache.H, J, J)
-        mul!(cache.g, J, fu)
-        cache.stats.njacs += 1
-    end
-
-    linres = dolinsolve(alg.precs, linsolve, A = cache.H, b = _vec(cache.g),
-        linu = _vec(u_tmp),
-        p = p, reltol = cache.abstol)
-    cache.linsolve = linres.cache
-    cache.u_tmp .= -1 .* u_tmp
-    dogleg!(cache)
-
-    # Compute the potentially new u
-    cache.u_tmp .= u .+ cache.step_size
-    f(cache.fu_new, cache.u_tmp, p)
-    trust_region_step!(cache)
-    cache.stats.nf += 1
-    cache.stats.nsolve += 1
-    cache.stats.nfactors += 1
-    return nothing
-end
-
-function perform_step!(cache::TrustRegionCache{false})
-    @unpack make_new_J, fu, f, u, p = cache
-
-    if make_new_J
-        J = jacobian(cache, f)
-        cache.H = J * J
-        cache.g = J * fu
-        cache.stats.njacs += 1
-    end
-
-    @unpack g, H = cache
-    # Compute the Newton step.
-    cache.u_tmp = -H \ g
-    dogleg!(cache)
-
-    # Compute the potentially new u
-    cache.u_tmp = u .+ cache.step_size
-    cache.fu_new = f(cache.u_tmp, p)
-    trust_region_step!(cache)
-    cache.stats.nf += 1
-    cache.stats.nsolve += 1
-    cache.stats.nfactors += 1
-    return nothing
-end
-
-function retrospective_step!(cache::TrustRegionCache{true})
-    @unpack J, fu_prev, fu, u_prev, u = cache
-    jacobian!(J, cache)
-    mul!(cache.H, J, J)
-    mul!(cache.g, J, fu)
-    cache.stats.njacs += 1
-    @unpack H, g, step_size = cache
-
-    return -(get_loss(fu_prev) - get_loss(fu)) /
-           (step_size' * g + step_size' * H * step_size / 2)
-end
-
-function retrospective_step!(cache::TrustRegionCache{false})
-    @unpack J, fu_prev, fu, u_prev, u, f = cache
-    J = jacobian(cache, f)
-    cache.H = J * J
-    cache.g = J * fu
-    cache.stats.njacs += 1
-    @unpack H, g, step_size = cache
-
-    return -(get_loss(fu_prev) - get_loss(fu)) /
-           (step_size' * g + step_size' * H * step_size / 2)
+    max_trust_radius::Real = 0 // 1, initial_trust_radius::Real = 0 // 1,
+    step_threshold::Real = 1 // 10, shrink_threshold::Real = 1 // 4,
+    expand_threshold::Real = 3 // 4, shrink_factor::Real = 1 // 4,
+    expand_factor::Real = 2 // 1, max_shrink_times::Int = 32, adkwargs...)
+    ad = default_adargs_to_adtype(adkwargs...)
+    return TrustRegion{_unwrap_val(concrete_jac)}(ad, linsolve, precs, radius_update_scheme,
+        max_trust_radius, initial_trust_radius, step_threshold, shrink_threshold,
+        expand_threshold, shrink_factor, expand_factor, max_shrink_times)
 end
 
-function trust_region_step!(cache::TrustRegionCache)
-    @unpack fu_new, step_size, g, H, loss, max_trust_r, radius_update_scheme = cache
-    cache.loss_new = get_loss(fu_new)
-
-    # Compute the ratio of the actual reduction to the predicted reduction.
-    cache.r = -(loss - cache.loss_new) / (step_size' * g + step_size' * H * step_size / 2)
-    @unpack r = cache
-
-    if radius_update_scheme === RadiusUpdateSchemes.Simple
-        # Update the trust region radius.
-        if r < cache.shrink_threshold
-            cache.trust_r *= cache.shrink_factor
-            cache.shrink_counter += 1
-        else
-            cache.shrink_counter = 0
-        end
-        if r > cache.step_threshold
-            take_step!(cache)
-            cache.loss = cache.loss_new
-
-            # Update the trust region radius.
-            if r > cache.expand_threshold
-                cache.trust_r = min(cache.expand_factor * cache.trust_r, max_trust_r)
-            end
-
-            cache.make_new_J = true
-        else
-            # No need to make a new J, no step was taken, so we try again with a smaller trust_r
-            cache.make_new_J = false
-        end
-
-        if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol
-            cache.force_stop = true
-        end
-
-    elseif radius_update_scheme === RadiusUpdateSchemes.Hei
-        if r > cache.step_threshold
-            take_step!(cache)
-            cache.loss = cache.loss_new
-            cache.make_new_J = true
-        else
-            cache.make_new_J = false
-        end
-        # Hei's radius update scheme
-        @unpack shrink_threshold, p1, p2, p3, p4 = cache
-        if rfunc(r, shrink_threshold, p1, p3, p4, p2) * cache.internalnorm(step_size) <
-           cache.trust_r
-            cache.shrink_counter += 1
-        else
-            cache.shrink_counter = 0
-        end
-        cache.trust_r = rfunc(r, shrink_threshold, p1, p3, p4, p2) *
-                        cache.internalnorm(step_size)
-
-        if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol ||
-           cache.internalnorm(g) < cache.ϵ
-            cache.force_stop = true
-        end
-
-    elseif radius_update_scheme === RadiusUpdateSchemes.Yuan
-        if r < cache.shrink_threshold
-            cache.p1 = cache.p2 * cache.p1
-            cache.shrink_counter += 1
-        elseif r >= cache.expand_threshold &&
-               cache.internalnorm(step_size) > cache.trust_r / 2
-            cache.p1 = cache.p3 * cache.p1
-            cache.shrink_counter = 0
-        end
-
-        if r > cache.step_threshold
-            take_step!(cache)
-            cache.loss = cache.loss_new
-            cache.make_new_J = true
-        else
-            cache.make_new_J = false
-        end
-
-        @unpack p1 = cache
-        cache.trust_r = p1 * cache.internalnorm(jvp!(cache))
-        if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol ||
-           cache.internalnorm(g) < cache.ϵ
-            cache.force_stop = true
-        end
-        #Fan's update scheme
-    elseif radius_update_scheme === RadiusUpdateSchemes.Fan
-        if r < cache.shrink_threshold
-            cache.p1 *= cache.p2
-            cache.shrink_counter += 1
-        elseif r > cache.expand_threshold
-            cache.p1 = min(cache.p1 * cache.p3, cache.p4)
-            cache.shrink_counter = 0
-        end
-
-        if r > cache.step_threshold
-            take_step!(cache)
-            cache.loss = cache.loss_new
-            cache.make_new_J = true
-        else
-            cache.make_new_J = false
-        end
-
-        @unpack p1 = cache
-        cache.trust_r = p1 * (cache.internalnorm(cache.fu)^0.99)
-        if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol ||
-           cache.internalnorm(g) < cache.ϵ
-            cache.force_stop = true
-        end
-    elseif radius_update_scheme === RadiusUpdateSchemes.Bastin
-        if r > cache.step_threshold
-            take_step!(cache)
-            cache.loss = cache.loss_new
-            cache.make_new_J = true
-            if retrospective_step!(cache) >= cache.expand_threshold
-                cache.trust_r = max(cache.p1 * cache.internalnorm(step_size), cache.trust_r)
-            end
-
-        else
-            cache.make_new_J = false
-            cache.trust_r *= cache.p2
-            cache.shrink_counter += 1
-        end
-        if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol
-            cache.force_stop = true
-        end
-    end
-end
-
-function dogleg!(cache::TrustRegionCache)
-    @unpack u_tmp, trust_r = cache
-
-    # Test if the full step is within the trust region.
-    if norm(u_tmp) ≤ trust_r
-        cache.step_size = deepcopy(u_tmp)
-        return
-    end
-
-    # Calcualte Cauchy point, optimum along the steepest descent direction.
-    δsd = -cache.g
-    norm_δsd = norm(δsd)
-    if norm_δsd ≥ trust_r
-        cache.step_size = δsd .* trust_r / norm_δsd
-        return
-    end
-
-    # Find the intersection point on the boundary.
-    N_sd = u_tmp - δsd
-    dot_N_sd = dot(N_sd, N_sd)
-    dot_sd_N_sd = dot(δsd, N_sd)
-    dot_sd = dot(δsd, δsd)
-    fact = dot_sd_N_sd^2 - dot_N_sd * (dot_sd - trust_r^2)
-    τ = (-dot_sd_N_sd + sqrt(fact)) / dot_N_sd
-    cache.step_size = δsd + τ * N_sd
-end
-
-function take_step!(cache::TrustRegionCache{true})
-    cache.u_prev .= cache.u
-    cache.u .= cache.u_tmp
-    cache.fu_prev .= cache.fu
-    cache.fu .= cache.fu_new
-end
-
-function take_step!(cache::TrustRegionCache{false})
-    cache.u_prev = cache.u
-    cache.u = cache.u_tmp
-    cache.fu_prev = cache.fu
-    cache.fu = cache.fu_new
-end
-
-function jvp!(cache::TrustRegionCache{false})
-    @unpack f, u, fu, p = cache
-    if isa(u, Number)
-        return value_derivative(x -> f(x, p), u)
-    end
-    return auto_jacvec(x -> f(x, p), u, fu)
-end
-
-function jvp!(cache::TrustRegionCache{true})
-    @unpack g, f, u, fu, p = cache
-    if isa(u, Number)
-        return value_derivative(x -> f(x, p), u)
-    end
-    auto_jacvec!(g, (fu, x) -> f(fu, x, p), u, fu)
-    g
-end
-
-function SciMLBase.solve!(cache::TrustRegionCache)
-    while !cache.force_stop && cache.stats.nsteps < cache.maxiters &&
-              cache.shrink_counter < cache.alg.max_shrink_times
-        perform_step!(cache)
-        cache.stats.nsteps += 1
-    end
-
-    if cache.stats.nsteps == cache.maxiters
-        cache.retcode = ReturnCode.MaxIters
-    else
-        cache.retcode = ReturnCode.Success
-    end
-
-    SciMLBase.build_solution(cache.prob, cache.alg, cache.u, cache.fu;
-        retcode = cache.retcode, stats = cache.stats)
-end
-
-function SciMLBase.reinit!(cache::TrustRegionCache{iip}, u0 = cache.u; p = cache.p,
-    abstol = cache.abstol, maxiters = cache.maxiters) where {iip}
-    cache.p = p
-    if iip
-        recursivecopy!(cache.u, u0)
-        cache.f(cache.fu, cache.u, p)
-    else
-        # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter
-        cache.u = u0
-        cache.fu = cache.f(cache.u, p)
-    end
-    cache.abstol = abstol
-    cache.maxiters = maxiters
-    cache.stats.nf = 1
-    cache.stats.nsteps = 1
-    cache.force_stop = false
-    cache.retcode = ReturnCode.Default
-    cache.make_new_J = true
-    cache.loss = get_loss(cache.fu)
-    cache.shrink_counter = 0
-    cache.trust_r = convert(eltype(cache.u), cache.alg.initial_trust_radius)
-    if iszero(cache.trust_r)
-        cache.trust_r = convert(eltype(cache.u), cache.max_trust_r / 11)
-    end
-    return cache
-end
+# @concrete mutable struct TrustRegionCache{iip}
+#     f
+#     alg
+#     u_prev::uType
+#     u::uType
+#     fu_prev::resType
+#     fu::resType
+#     p
+#     uf
+#     linsolve
+#     J::jType
+#     jac_cache
+#     force_stop::Bool
+#     maxiters::Int
+#     internalnorm
+#     retcode::ReturnCode.T
+#     abstol
+#     prob
+#     radius_update_scheme::RadiusUpdateSchemes.T
+#     trust_r::trustType
+#     max_trust_r::trustType
+#     step_threshold
+#     shrink_threshold::trustType
+#     expand_threshold::trustType
+#     shrink_factor::trustType
+#     expand_factor::trustType
+#     loss::floatType
+#     loss_new::floatType
+#     H::jType
+#     g::resType
+#     shrink_counter::Int
+#     step_size
+#     u_tmp
+#     fu_new::resType
+#     make_new_J::Bool
+#     r::floatType
+#     p1::floatType
+#     p2::floatType
+#     p3::floatType
+#     p4::floatType
+#     ϵ::floatType
+#     stats::NLStats
+# end
+
+# function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
+#     args...;
+#     alias_u0 = false,
+#     maxiters = 1000,
+#     abstol = 1e-8,
+#     internalnorm = DEFAULT_NORM,
+#     kwargs...) where {uType, iip}
+#     if alias_u0
+#         u = prob.u0
+#     else
+#         u = deepcopy(prob.u0)
+#     end
+#     u_prev = zero(u)
+#     f = prob.f
+#     p = prob.p
+#     if iip
+#         fu = zero(u)
+#         f(fu, u, p)
+#     else
+#         fu = f(u, p)
+#     end
+#     fu_prev = zero(fu)
+
+#     loss = get_loss(fu)
+#     uf, linsolve, J, u_tmp, jac_config = jacobian_caches(alg, f, u, p, Val(iip))
+
+#     radius_update_scheme = alg.radius_update_scheme
+#     max_trust_radius = convert(eltype(u), alg.max_trust_radius)
+#     initial_trust_radius = convert(eltype(u), alg.initial_trust_radius)
+#     step_threshold = convert(eltype(u), alg.step_threshold)
+#     shrink_threshold = convert(eltype(u), alg.shrink_threshold)
+#     expand_threshold = convert(eltype(u), alg.expand_threshold)
+#     shrink_factor = convert(eltype(u), alg.shrink_factor)
+#     expand_factor = convert(eltype(u), alg.expand_factor)
+#     # Set default trust region radius if not specified
+#     if iszero(max_trust_radius)
+#         max_trust_radius = convert(eltype(u), max(norm(fu), maximum(u) - minimum(u)))
+#     end
+#     if iszero(initial_trust_radius)
+#         initial_trust_radius = convert(eltype(u), max_trust_radius / 11)
+#     end
+
+#     loss_new = loss
+#     H = ArrayInterface.undefmatrix(u)
+#     g = zero(fu)
+#     shrink_counter = 0
+#     step_size = zero(u)
+#     fu_new = zero(fu)
+#     make_new_J = true
+#     r = loss
+
+#     # Parameters for the Schemes
+#     p1 = convert(eltype(u), 0.0)
+#     p2 = convert(eltype(u), 0.0)
+#     p3 = convert(eltype(u), 0.0)
+#     p4 = convert(eltype(u), 0.0)
+#     ϵ = convert(eltype(u), 1.0e-8)
+#     if radius_update_scheme === RadiusUpdateSchemes.Hei
+#         step_threshold = convert(eltype(u), 0.0)
+#         shrink_threshold = convert(eltype(u), 0.25)
+#         expand_threshold = convert(eltype(u), 0.25)
+#         p1 = convert(eltype(u), 5.0) # M
+#         p2 = convert(eltype(u), 0.1) # β
+#         p3 = convert(eltype(u), 0.15) # γ1
+#         p4 = convert(eltype(u), 0.15) # γ2
+#         initial_trust_radius = convert(eltype(u), 1.0)
+#     elseif radius_update_scheme === RadiusUpdateSchemes.Yuan
+#         step_threshold = convert(eltype(u), 0.0001)
+#         shrink_threshold = convert(eltype(u), 0.25)
+#         expand_threshold = convert(eltype(u), 0.25)
+#         p1 = convert(eltype(u), 2.0) # μ
+#         p2 = convert(eltype(u), 1 / 6) # c5
+#         p3 = convert(eltype(u), 6.0) # c6
+#         p4 = convert(eltype(u), 0.0)
+#         if iip
+#             auto_jacvec!(g, (fu, x) -> f(fu, x, p), u, fu)
+#         else
+#             if isa(u, Number)
+#                 g = ForwardDiff.derivative(x -> f(x, p), u)
+#             else
+#                 g = auto_jacvec(x -> f(x, p), u, fu)
+#             end
+#         end
+#         initial_trust_radius = convert(eltype(u), p1 * norm(g))
+#     elseif radius_update_scheme === RadiusUpdateSchemes.Fan
+#         step_threshold = convert(eltype(u), 0.0001)
+#         shrink_threshold = convert(eltype(u), 0.25)
+#         expand_threshold = convert(eltype(u), 0.75)
+#         p1 = convert(eltype(u), 0.1) # μ
+#         p2 = convert(eltype(u), 1 / 4) # c5
+#         p3 = convert(eltype(u), 12) # c6
+#         p4 = convert(eltype(u), 1.0e18) # M
+#         initial_trust_radius = convert(eltype(u), p1 * (norm(fu)^0.99))
+#     elseif radius_update_scheme === RadiusUpdateSchemes.Bastin
+#         step_threshold = convert(eltype(u), 0.05)
+#         shrink_threshold = convert(eltype(u), 0.05)
+#         expand_threshold = convert(eltype(u), 0.9)
+#         p1 = convert(eltype(u), 2.5)  #alpha_1
+#         p2 = convert(eltype(u), 0.25) # alpha_2
+#         p3 = convert(eltype(u), 0) # not required
+#         p4 = convert(eltype(u), 0) # not required
+#         initial_trust_radius = convert(eltype(u), 1.0)
+#     end
+
+#     return TrustRegionCache{iip}(f, alg, u_prev, u, fu_prev, fu, p, uf, linsolve, J,
+#         jac_config,
+#         false, maxiters, internalnorm,
+#         ReturnCode.Default, abstol, prob, radius_update_scheme,
+#         initial_trust_radius,
+#         max_trust_radius, step_threshold, shrink_threshold,
+#         expand_threshold, shrink_factor, expand_factor, loss,
+#         loss_new, H, g, shrink_counter, step_size, u_tmp, fu_new,
+#         make_new_J, r, p1, p2, p3, p4, ϵ, NLStats(1, 0, 0, 0, 0))
+# end
+
+# function perform_step!(cache::TrustRegionCache{true})
+#     @unpack make_new_J, J, fu, f, u, p, u_tmp, alg, linsolve = cache
+#     if cache.make_new_J
+#         jacobian!(J, cache)
+#         mul!(cache.H, J, J)
+#         mul!(cache.g, J, fu)
+#         cache.stats.njacs += 1
+#     end
+
+#     linres = dolinsolve(alg.precs, linsolve, A = cache.H, b = _vec(cache.g),
+#         linu = _vec(u_tmp),
+#         p = p, reltol = cache.abstol)
+#     cache.linsolve = linres.cache
+#     cache.u_tmp .= -1 .* u_tmp
+#     dogleg!(cache)
+
+#     # Compute the potentially new u
+#     cache.u_tmp .= u .+ cache.step_size
+#     f(cache.fu_new, cache.u_tmp, p)
+#     trust_region_step!(cache)
+#     cache.stats.nf += 1
+#     cache.stats.nsolve += 1
+#     cache.stats.nfactors += 1
+#     return nothing
+# end
+
+# function perform_step!(cache::TrustRegionCache{false})
+#     @unpack make_new_J, fu, f, u, p = cache
+
+#     if make_new_J
+#         J = jacobian(cache, f)
+#         cache.H = J * J
+#         cache.g = J * fu
+#         cache.stats.njacs += 1
+#     end
+
+#     @unpack g, H = cache
+#     # Compute the Newton step.
+#     cache.u_tmp = -H \ g
+#     dogleg!(cache)
+
+#     # Compute the potentially new u
+#     cache.u_tmp = u .+ cache.step_size
+#     cache.fu_new = f(cache.u_tmp, p)
+#     trust_region_step!(cache)
+#     cache.stats.nf += 1
+#     cache.stats.nsolve += 1
+#     cache.stats.nfactors += 1
+#     return nothing
+# end
+
+# function retrospective_step!(cache::TrustRegionCache{true})
+#     @unpack J, fu_prev, fu, u_prev, u = cache
+#     jacobian!(J, cache)
+#     mul!(cache.H, J, J)
+#     mul!(cache.g, J, fu)
+#     cache.stats.njacs += 1
+#     @unpack H, g, step_size = cache
+
+#     return -(get_loss(fu_prev) - get_loss(fu)) /
+#            (step_size' * g + step_size' * H * step_size / 2)
+# end
+
+# function retrospective_step!(cache::TrustRegionCache{false})
+#     @unpack J, fu_prev, fu, u_prev, u, f = cache
+#     J = jacobian(cache, f)
+#     cache.H = J * J
+#     cache.g = J * fu
+#     cache.stats.njacs += 1
+#     @unpack H, g, step_size = cache
+
+#     return -(get_loss(fu_prev) - get_loss(fu)) /
+#            (step_size' * g + step_size' * H * step_size / 2)
+# end
+
+# function trust_region_step!(cache::TrustRegionCache)
+#     @unpack fu_new, step_size, g, H, loss, max_trust_r, radius_update_scheme = cache
+#     cache.loss_new = get_loss(fu_new)
+
+#     # Compute the ratio of the actual reduction to the predicted reduction.
+#     cache.r = -(loss - cache.loss_new) / (step_size' * g + step_size' * H * step_size / 2)
+#     @unpack r = cache
+
+#     if radius_update_scheme === RadiusUpdateSchemes.Simple
+#         # Update the trust region radius.
+#         if r < cache.shrink_threshold
+#             cache.trust_r *= cache.shrink_factor
+#             cache.shrink_counter += 1
+#         else
+#             cache.shrink_counter = 0
+#         end
+#         if r > cache.step_threshold
+#             take_step!(cache)
+#             cache.loss = cache.loss_new
+
+#             # Update the trust region radius.
+#             if r > cache.expand_threshold
+#                 cache.trust_r = min(cache.expand_factor * cache.trust_r, max_trust_r)
+#             end
+
+#             cache.make_new_J = true
+#         else
+#             # No need to make a new J, no step was taken, so we try again with a smaller trust_r
+#             cache.make_new_J = false
+#         end
+
+#         if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol
+#             cache.force_stop = true
+#         end
+
+#     elseif radius_update_scheme === RadiusUpdateSchemes.Hei
+#         if r > cache.step_threshold
+#             take_step!(cache)
+#             cache.loss = cache.loss_new
+#             cache.make_new_J = true
+#         else
+#             cache.make_new_J = false
+#         end
+#         # Hei's radius update scheme
+#         @unpack shrink_threshold, p1, p2, p3, p4 = cache
+#         if rfunc(r, shrink_threshold, p1, p3, p4, p2) * cache.internalnorm(step_size) <
+#            cache.trust_r
+#             cache.shrink_counter += 1
+#         else
+#             cache.shrink_counter = 0
+#         end
+#         cache.trust_r = rfunc(r, shrink_threshold, p1, p3, p4, p2) *
+#                         cache.internalnorm(step_size)
+
+#         if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol ||
+#            cache.internalnorm(g) < cache.ϵ
+#             cache.force_stop = true
+#         end
+
+#     elseif radius_update_scheme === RadiusUpdateSchemes.Yuan
+#         if r < cache.shrink_threshold
+#             cache.p1 = cache.p2 * cache.p1
+#             cache.shrink_counter += 1
+#         elseif r >= cache.expand_threshold &&
+#                cache.internalnorm(step_size) > cache.trust_r / 2
+#             cache.p1 = cache.p3 * cache.p1
+#             cache.shrink_counter = 0
+#         end
+
+#         if r > cache.step_threshold
+#             take_step!(cache)
+#             cache.loss = cache.loss_new
+#             cache.make_new_J = true
+#         else
+#             cache.make_new_J = false
+#         end
+
+#         @unpack p1 = cache
+#         cache.trust_r = p1 * cache.internalnorm(jvp!(cache))
+#         if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol ||
+#            cache.internalnorm(g) < cache.ϵ
+#             cache.force_stop = true
+#         end
+#         #Fan's update scheme
+#     elseif radius_update_scheme === RadiusUpdateSchemes.Fan
+#         if r < cache.shrink_threshold
+#             cache.p1 *= cache.p2
+#             cache.shrink_counter += 1
+#         elseif r > cache.expand_threshold
+#             cache.p1 = min(cache.p1 * cache.p3, cache.p4)
+#             cache.shrink_counter = 0
+#         end
+
+#         if r > cache.step_threshold
+#             take_step!(cache)
+#             cache.loss = cache.loss_new
+#             cache.make_new_J = true
+#         else
+#             cache.make_new_J = false
+#         end
+
+#         @unpack p1 = cache
+#         cache.trust_r = p1 * (cache.internalnorm(cache.fu)^0.99)
+#         if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol ||
+#            cache.internalnorm(g) < cache.ϵ
+#             cache.force_stop = true
+#         end
+#     elseif radius_update_scheme === RadiusUpdateSchemes.Bastin
+#         if r > cache.step_threshold
+#             take_step!(cache)
+#             cache.loss = cache.loss_new
+#             cache.make_new_J = true
+#             if retrospective_step!(cache) >= cache.expand_threshold
+#                 cache.trust_r = max(cache.p1 * cache.internalnorm(step_size), cache.trust_r)
+#             end
+
+#         else
+#             cache.make_new_J = false
+#             cache.trust_r *= cache.p2
+#             cache.shrink_counter += 1
+#         end
+#         if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol
+#             cache.force_stop = true
+#         end
+#     end
+# end
+
+# function dogleg!(cache::TrustRegionCache)
+#     @unpack u_tmp, trust_r = cache
+
+#     # Test if the full step is within the trust region.
+#     if norm(u_tmp) ≤ trust_r
+#         cache.step_size = deepcopy(u_tmp)
+#         return
+#     end
+
+#     # Calcualte Cauchy point, optimum along the steepest descent direction.
+#     δsd = -cache.g
+#     norm_δsd = norm(δsd)
+#     if norm_δsd ≥ trust_r
+#         cache.step_size = δsd .* trust_r / norm_δsd
+#         return
+#     end
+
+#     # Find the intersection point on the boundary.
+#     N_sd = u_tmp - δsd
+#     dot_N_sd = dot(N_sd, N_sd)
+#     dot_sd_N_sd = dot(δsd, N_sd)
+#     dot_sd = dot(δsd, δsd)
+#     fact = dot_sd_N_sd^2 - dot_N_sd * (dot_sd - trust_r^2)
+#     τ = (-dot_sd_N_sd + sqrt(fact)) / dot_N_sd
+#     cache.step_size = δsd + τ * N_sd
+# end
+
+# function take_step!(cache::TrustRegionCache{true})
+#     cache.u_prev .= cache.u
+#     cache.u .= cache.u_tmp
+#     cache.fu_prev .= cache.fu
+#     cache.fu .= cache.fu_new
+# end
+
+# function take_step!(cache::TrustRegionCache{false})
+#     cache.u_prev = cache.u
+#     cache.u = cache.u_tmp
+#     cache.fu_prev = cache.fu
+#     cache.fu = cache.fu_new
+# end
+
+# function jvp!(cache::TrustRegionCache{false})
+#     @unpack f, u, fu, p = cache
+#     if isa(u, Number)
+#         return value_derivative(x -> f(x, p), u)
+#     end
+#     return auto_jacvec(x -> f(x, p), u, fu)
+# end
+
+# function jvp!(cache::TrustRegionCache{true})
+#     @unpack g, f, u, fu, p = cache
+#     if isa(u, Number)
+#         return value_derivative(x -> f(x, p), u)
+#     end
+#     auto_jacvec!(g, (fu, x) -> f(fu, x, p), u, fu)
+#     g
+# end
+
+# function SciMLBase.solve!(cache::TrustRegionCache)
+#     while !cache.force_stop && cache.stats.nsteps < cache.maxiters &&
+#               cache.shrink_counter < cache.alg.max_shrink_times
+#         perform_step!(cache)
+#         cache.stats.nsteps += 1
+#     end
+
+#     if cache.stats.nsteps == cache.maxiters
+#         cache.retcode = ReturnCode.MaxIters
+#     else
+#         cache.retcode = ReturnCode.Success
+#     end
+
+#     SciMLBase.build_solution(cache.prob, cache.alg, cache.u, cache.fu;
+#         retcode = cache.retcode, stats = cache.stats)
+# end
+
+# function SciMLBase.reinit!(cache::TrustRegionCache{iip}, u0 = cache.u; p = cache.p,
+#     abstol = cache.abstol, maxiters = cache.maxiters) where {iip}
+#     cache.p = p
+#     if iip
+#         recursivecopy!(cache.u, u0)
+#         cache.f(cache.fu, cache.u, p)
+#     else
+#         # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter
+#         cache.u = u0
+#         cache.fu = cache.f(cache.u, p)
+#     end
+#     cache.abstol = abstol
+#     cache.maxiters = maxiters
+#     cache.stats.nf = 1
+#     cache.stats.nsteps = 1
+#     cache.force_stop = false
+#     cache.retcode = ReturnCode.Default
+#     cache.make_new_J = true
+#     cache.loss = get_loss(cache.fu)
+#     cache.shrink_counter = 0
+#     cache.trust_r = convert(eltype(cache.u), cache.alg.initial_trust_radius)
+#     if iszero(cache.trust_r)
+#         cache.trust_r = convert(eltype(cache.u), cache.max_trust_r / 11)
+#     end
+#     return cache
+# end
diff --git a/src/utils.jl b/src/utils.jl
index 9d72e230f..c50d52ad7 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -2,88 +2,64 @@
 @inline UNITLESS_ABS2(x) = real(abs2(x))
 @inline DEFAULT_NORM(u::Union{AbstractFloat, Complex}) = @fastmath abs(u)
 @inline function DEFAULT_NORM(u::Array{T}) where {T <: Union{AbstractFloat, Complex}}
-    sqrt(real(sum(abs2, u)) / length(u))
+    return sqrt(real(sum(abs2, u)) / length(u))
 end
-@inline function DEFAULT_NORM(u::StaticArraysCore.StaticArray{
-    T,
-}) where {
-    T <: Union{
-        AbstractFloat,
-        Complex}}
-    sqrt(real(sum(abs2, u)) / length(u))
+@inline function DEFAULT_NORM(u::StaticArray{<:Union{AbstractFloat, Complex}})
+    return sqrt(real(sum(abs2, u)) / length(u))
 end
-@inline function DEFAULT_NORM(u::RecursiveArrayTools.AbstractVectorOfArray)
-    sum(sqrt(real(sum(UNITLESS_ABS2, _u)) / length(_u)) for _u in u.u)
+@inline function DEFAULT_NORM(u::AbstractVectorOfArray)
+    return sum(sqrt(real(sum(UNITLESS_ABS2, _u)) / length(_u)) for _u in u.u)
 end
 @inline DEFAULT_NORM(u::AbstractArray) = sqrt(real(sum(UNITLESS_ABS2, u)) / length(u))
 @inline DEFAULT_NORM(u) = norm(u)
 
-alg_autodiff(alg::AbstractNewtonAlgorithm{CS, AD}) where {CS, AD} = AD
+alg_autodiff(alg::AbstractNewtonAlgorithm{<:AbstractFiniteDifferencesMode}) = false
+alg_autodiff(alg::AbstractNewtonAlgorithm) = true
 alg_autodiff(alg) = false
 
 """
-value_derivative(f, x)
+    default_adargs_to_adtype(; chunk_size = Val{0}(), autodiff = Val{true}(),
+        standardtag = Val{true}(), diff_type = Val{:forward})
 
-Compute `f(x), d/dx f(x)` in the most efficient way.
+Construct the AD type from the arguments. This is mostly needed for compatibility with older
+code.
 """
-function value_derivative(f::F, x::R) where {F, R}
-    T = typeof(ForwardDiff.Tag(f, R))
-    out = f(ForwardDiff.Dual{T}(x, one(x)))
-    ForwardDiff.value(out), ForwardDiff.extract_derivative(T, out)
+function default_adargs_to_adtype(; chunk_size = Val{0}(), autodiff = Val{true}(),
+    standardtag = Val{true}(), diff_type = Val{:forward}())
+    ad = _unwrap_val(autodiff)
+    # Old API
+    if ad isa Bool
+        # FIXME: standardtag is not the Tag
+        ad && return AutoForwardDiff(; chunksize = _unwrap_val(chunk_size),
+            tag = _unwrap_val(standardtag))
+        return AutoFiniteDiff(; fdtype = diff_type)
+    end
+    return ad
 end
 
-# Todo: improve this dispatch
-function value_derivative(f::F, x::StaticArraysCore.SVector) where {F}
-    f(x), ForwardDiff.jacobian(f, x)
-end
+# """
+# value_derivative(f, x)
 
-value(x) = x
-value(x::Dual) = ForwardDiff.value(x)
-value(x::AbstractArray{<:Dual}) = map(ForwardDiff.value, x)
-
-_vec(v) = vec(v)
-_vec(v::Number) = v
-_vec(v::AbstractVector) = v
-
-function alg_difftype(alg::AbstractNewtonAlgorithm{
-    CS,
-    AD,
-    FDT,
-    ST,
-    CJ,
-}) where {CS, AD, FDT, ST, CJ}
-    FDT
-end
+# Compute `f(x), d/dx f(x)` in the most efficient way.
+# """
+# function value_derivative(f::F, x::R) where {F, R}
+#     T = typeof(ForwardDiff.Tag(f, R))
+#     out = f(ForwardDiff.Dual{T}(x, one(x)))
+#     ForwardDiff.value(out), ForwardDiff.extract_derivative(T, out)
+# end
 
-function concrete_jac(alg::AbstractNewtonAlgorithm{
-    CS,
-    AD,
-    FDT,
-    ST,
-    CJ,
-}) where {CS, AD, FDT, ST, CJ}
-    CJ
-end
+# # Todo: improve this dispatch
+# function value_derivative(f::F, x::StaticArraysCore.SVector) where {F}
+#     f(x), ForwardDiff.jacobian(f, x)
+# end
 
-function get_chunksize(alg::AbstractNewtonAlgorithm{
-    CS,
-    AD,
-    FDT,
-    ST,
-    CJ,
-}) where {CS, AD, FDT, ST, CJ}
-    Val(CS)
-end
+@inline value(x) = x
+@inline value(x::Dual) = ForwardDiff.value(x)
+@inline value(x::AbstractArray{<:Dual}) = map(ForwardDiff.value, x)
 
-function standardtag(alg::AbstractNewtonAlgorithm{
-    CS,
-    AD,
-    FDT,
-    ST,
-    CJ,
-}) where {CS, AD, FDT, ST, CJ}
-    ST
-end
+@inline _vec(v) = vec(v)
+@inline _vec(v::Number) = v
+@inline _vec(v::AbstractVector) = v
 
 DEFAULT_PRECS(W, du, u, p, t, newW, Plprev, Prprev, cachedata) = nothing, nothing
 
@@ -94,10 +70,8 @@ function dolinsolve(precs::P, linsolve; A = nothing, linu = nothing, b = nothing
     b !== nothing && (linsolve.b = b)
     linu !== nothing && (linsolve.u = linu)
 
-    Plprev = linsolve.Pl isa LinearSolve.ComposePreconditioner ? linsolve.Pl.outer :
-             linsolve.Pl
-    Prprev = linsolve.Pr isa LinearSolve.ComposePreconditioner ? linsolve.Pr.outer :
-             linsolve.Pr
+    Plprev = linsolve.Pl isa ComposePreconditioner ? linsolve.Pl.outer : linsolve.Pl
+    Prprev = linsolve.Pr isa ComposePreconditioner ? linsolve.Pr.outer : linsolve.Pr
 
     _Pl, _Pr = precs(linsolve.A, du, u, p, nothing, A !== nothing, Plprev, Prprev,
         cachedata)
@@ -110,29 +84,25 @@ function dolinsolve(precs::P, linsolve; A = nothing, linu = nothing, b = nothing
         linsolve.Pr = Pr
     end
 
-    linres = if reltol === nothing
-        solve!(linsolve)
-    else
-        solve!(linsolve; reltol)
-    end
+    linres = reltol === nothing ? solve!(linsolve) : solve!(linsolve; reltol)
 
     return linres
 end
 
 function wrapprecs(_Pl, _Pr, weight)
     if _Pl !== nothing
-        Pl = LinearSolve.ComposePreconditioner(LinearSolve.InvPreconditioner(Diagonal(_vec(weight))),
-            _Pl)
+        Pl = ComposePreconditioner(InvPreconditioner(Diagonal(_vec(weight))), _Pl)
     else
-        Pl = LinearSolve.InvPreconditioner(Diagonal(_vec(weight)))
+        Pl = InvPreconditioner(Diagonal(_vec(weight)))
     end
 
     if _Pr !== nothing
-        Pr = LinearSolve.ComposePreconditioner(Diagonal(_vec(weight)), _Pr)
+        Pr = ComposePreconditioner(Diagonal(_vec(weight)), _Pr)
     else
         Pr = Diagonal(_vec(weight))
     end
-    Pl, Pr
+
+    return Pl, Pr
 end
 
 function _nfcount(N, ::Type{diff_type}) where {diff_type}
@@ -143,17 +113,18 @@ function _nfcount(N, ::Type{diff_type}) where {diff_type}
     else
         tmp = 2N
     end
-    tmp
+    return tmp
 end
 
-function get_loss(fu)
-    return norm(fu)^2 / 2
-end
+get_loss(fu) = norm(fu)^2 / 2
 
 function rfunc(r::R, c2::R, M::R, γ1::R, γ2::R, β::R) where {R <: Real} # R-function for adaptive trust region method
-    if (r >= c2)
+    if (r ≥ c2)
         return (2 * (M - 1 - γ2) * atan(r - c2) + (1 + γ2)) / π
     else
         return (1 - γ1 - β) * (exp(r - c2) + β / (1 - γ1 - β))
     end
 end
+
+concrete_jac(_) = nothing
+concrete_jac(::AbstractNewtonAlgorithm{CJ}) where {CJ} = CJ