JuliaDiff · ChrisRackauckas · Dec 29, 2023 · Dec 29, 2023
diff --git a/README.md b/README.md
@@ -282,7 +282,7 @@ H = numauto_color_hessian(f, x, colorvec, sparsity)
 numauto_color_hessian!(H, f, x, colorvec, sparsity)
 ```
 
-To avoid unnecessary allocations every time the Hessian is computed, 
+To avoid unnecessary allocations every time the Hessian is computed,
 construct a `ForwardColorHesCache` beforehand:
 
 ```julia
@@ -292,7 +292,7 @@ numauto_color_hessian!(H, f, x, hescache)
 
 By default, these methods use a mix of numerical and automatic differentiation,
 namely by taking finite differences of gradients calculated via ForwardDiff.jl.
-Alternatively, if you have your own custom gradient function `g!`, you can specify 
+Alternatively, if you have your own custom gradient function `g!`, you can specify
 it as an argument to `ForwardColorHesCache`:
 
 ```julia
@@ -301,7 +301,6 @@ hescache = ForwardColorHesCache(f, x, colorvec, sparsity, g!)
 Note that any user-defined gradient needs to have the signature `g!(G, x)`,
 i.e. updating the gradient `G` in place.
 
-
 ### Jacobian-Vector and Hessian-Vector Products
 
 Matrix-free implementations of Jacobian-Vector and Hessian-Vector products is
@@ -322,7 +321,8 @@ auto_jacvec(f, x, v)
 
 # If compute_f0 is false, then `f(cache1,x)` will be computed
 num_jacvec!(dy,f,x,v,cache1 = similar(v),
-                     cache2 = similar(v);
+                     cache2 = similar(v),
+                     cache3 = similar(v);
                      compute_f0 = true)
 num_jacvec(f,x,v,f0=nothing)
 ```
@@ -333,14 +333,16 @@ For Hessians, the following are provided:
 num_hesvec!(dy,f,x,v,
              cache1 = similar(v),
              cache2 = similar(v),
-             cache3 = similar(v))
+             cache3 = similar(v),
+             cache4 = similar(v))
 
 num_hesvec(f,x,v)
 
 numauto_hesvec!(dy,f,x,v,
                  cache = ForwardDiff.GradientConfig(f,v),
                  cache1 = similar(v),
-                 cache2 = similar(v))
+                 cache2 = similar(v),
+                 cache3 = similar(v))
 
 numauto_hesvec(f,x,v)
 
@@ -358,6 +360,7 @@ respectively:
 
 ```julia
 num_hesvecgrad!(dy,g,x,v,
+                     cache1 = similar(v),
                      cache2 = similar(v),
                      cache3 = similar(v))
 
@@ -384,7 +387,8 @@ using Zygote # Required
 
 numback_hesvec!(dy,f,x,v,
                      cache1 = similar(v),
-                     cache2 = similar(v))
+                     cache2 = similar(v),
+                     cache3 = similar(v))
 
 numback_hesvec(f,x,v)
 
@@ -396,7 +400,7 @@ autoback_hesvec!(dy,f,x,v,
 autoback_hesvec(f,x,v)
 ```
 
-#### J*v and H*v Operators
+#### `J*v` and `H*v` Operators
 
 The following produce matrix-free operators which are used for calculating
 Jacobian-vector and Hessian-vector products where the differentiation takes

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -244,7 +244,7 @@ forwarddiff_color_jacobian!(J::AbstractMatrix{<:Number},
 `dx` is a pre-allocated output vector which is used to declare the output size,
 and thus allows for specifying a non-square Jacobian.
 
-Also, it is possible retrieve the function value via `value(jac_cache)` or 
+Also, it is possible retrieve the function value via `value(jac_cache)` or
 `value!(result, jac_cache)`
 
 
@@ -276,7 +276,7 @@ H = numauto_color_hessian(f, x, colorvec, sparsity)
 numauto_color_hessian!(H, f, x, colorvec, sparsity)
 ```
 
-To avoid unnecessary allocations every time the Hessian is computed, 
+To avoid unnecessary allocations every time the Hessian is computed,
 construct a `ForwardColorHesCache` beforehand:
 
 ```julia
@@ -286,7 +286,7 @@ numauto_color_hessian!(H, f, x, hescache)
 
 By default, these methods use a mix of numerical and automatic differentiation,
 namely by taking finite differences of gradients calculated via ForwardDiff.jl.
-Alternatively, if you have your own custom gradient function `g!`, you can specify 
+Alternatively, if you have your own custom gradient function `g!`, you can specify
 it as an argument to `ForwardColorHesCache`:
 
 ```julia
@@ -295,7 +295,6 @@ hescache = ForwardColorHesCache(f, x, colorvec, sparsity, g!)
 Note that any user-defined gradient needs to have the signature `g!(G, x)`,
 i.e. updating the gradient `G` in place.
 
-
 ### Jacobian-Vector and Hessian-Vector Products
 
 Matrix-free implementations of Jacobian-Vector and Hessian-Vector products is
@@ -316,7 +315,8 @@ auto_jacvec(f, x, v)
 
 # If compute_f0 is false, then `f(cache1,x)` will be computed
 num_jacvec!(dy,f,x,v,cache1 = similar(v),
-                     cache2 = similar(v);
+                     cache2 = similar(v),
+                     cache3 = similar(v);
                      compute_f0 = true)
 num_jacvec(f,x,v,f0=nothing)
 ```
@@ -327,14 +327,16 @@ For Hessians, the following are provided:
 num_hesvec!(dy,f,x,v,
              cache1 = similar(v),
              cache2 = similar(v),
-             cache3 = similar(v))
+             cache3 = similar(v),
+             cache4 = similar(v))
 
 num_hesvec(f,x,v)
 
 numauto_hesvec!(dy,f,x,v,
                  cache = ForwardDiff.GradientConfig(f,v),
                  cache1 = similar(v),
-                 cache2 = similar(v))
+                 cache2 = similar(v),
+                 cache3 = similar(v))
 
 numauto_hesvec(f,x,v)
 
@@ -352,6 +354,7 @@ respectively:
 
 ```julia
 num_hesvecgrad!(dy,g,x,v,
+                     cache1 = similar(v),
                      cache2 = similar(v),
                      cache3 = similar(v))
 
@@ -378,7 +381,8 @@ using Zygote # Required
 
 numback_hesvec!(dy,f,x,v,
                      cache1 = similar(v),
-                     cache2 = similar(v))
+                     cache2 = similar(v),
+                     cache3 = similar(v))
 
 numback_hesvec(f,x,v)
 
@@ -390,7 +394,7 @@ autoback_hesvec!(dy,f,x,v,
 autoback_hesvec(f,x,v)
 ```
 
-#### J*v and H*v Operators
+#### `J*v` and `H*v` Operators
 
 The following produce matrix-free operators which are used for calculating
 Jacobian-vector and Hessian-vector products where the differentiation takes

diff --git a/ext/SparseDiffToolsZygoteExt.jl b/ext/SparseDiffToolsZygoteExt.jl
@@ -45,18 +45,17 @@
 
 ### Jac, Hes products
 
-function numback_hesvec!(dy, f::F, x, v, cache1 = similar(v), cache2 = similar(v)) where {F}
+function numback_hesvec!(dy, f::F, x, v, cache1 = similar(v), cache2 = similar(v), cache3 = similar(v)) where {F}
     g = let f = f
         (dx, x) -> dx .= first(Zygote.gradient(f, x))
     end
     T = eltype(x)
     # Should it be min? max? mean?
     ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
-    @. x += ϵ * v
-    g(cache1, x)
-    @. x -= 2ϵ * v
-    g(cache2, x)
-    @. x += ϵ * v
+    @. cache3 = x + ϵ * v
+    g(cache1, cache3)
+    @. cache3 = x - ϵ * v
+    g(cache2, cache3)
     @. dy = (cache1 - cache2) / (2ϵ)
 end
 

diff --git a/src/differentiation/jaches_products.jl b/src/differentiation/jaches_products.jl
@@ -32,16 +32,15 @@
     vec(partials.(vec(f(y)), 1))
 end
 
-function num_jacvec!(dy, f, x, v, cache1 = similar(v), cache2 = similar(v);
+function num_jacvec!(dy, f, x, v, cache1 = similar(v), cache2 = similar(v), cache3 = similar(v);
         compute_f0 = true)
     vv = reshape(v, axes(x))
     compute_f0 && (f(cache1, x))
     T = eltype(x)
     # Should it be min? max? mean?
     ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
-    @. x += ϵ * vv
-    f(cache2, x)
-    @. x -= ϵ * vv
+    @. cache3 = x + ϵ * vv
+    f(cache2, cache3)
     vecdy = _vec(dy)
     veccache1 = _vec(cache1)
     veccache2 = _vec(cache2)
@@ -58,19 +57,18 @@
 end
 
 function num_hesvec!(dy, f, x, v, cache1 = similar(v), cache2 = similar(v),
-        cache3 = similar(v))
+        cache3 = similar(v), cache4 = similar(v))
     cache = FiniteDiff.GradientCache(v[1], cache1, Val{:central})
     g = let f = f, cache = cache
         (dx, x) -> FiniteDiff.finite_difference_gradient!(dx, f, x, cache)
     end
     T = eltype(x)
     # Should it be min? max? mean?
     ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
-    @. x += ϵ * v
-    g(cache2, x)
-    @. x -= 2ϵ * v
-    g(cache3, x)
-    @. x += ϵ * v
+    @. cache4 = x + ϵ * v
+    g(cache2, cache4)
+    @. cache4 = x - ϵ * v
+    g(cache3, cache4)
     @. dy = (cache2 - cache3) / (2ϵ)
 end
 
@@ -87,18 +85,17 @@
 end
 
 function numauto_hesvec!(dy, f, x, v, cache = ForwardDiff.GradientConfig(f, v),
-        cache1 = similar(v), cache2 = similar(v))
+        cache1 = similar(v), cache2 = similar(v), cache3 = similar(v))
     g = let f = f, x = x, cache = cache
         g = (dx, x) -> ForwardDiff.gradient!(dx, f, x, cache)
     end
     T = eltype(x)
     # Should it be min? max? mean?
     ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
-    @. x += ϵ * v
-    g(cache1, x)
-    @. x -= 2ϵ * v
-    g(cache2, x)
-    @. x += ϵ * v
+    @. cache3 = x + ϵ * v
+    g(cache1, cache3)
+    @. cache3 = x - ϵ * v
+    g(cache2, cache3)
     @. dy = (cache1 - cache2) / (2ϵ)
 end
 
@@ -137,16 +134,15 @@
     partials.(g(Dual{DeivVecTag}.(x, v)), 1)
 end
 
-function num_hesvecgrad!(dy, g, x, v, cache2 = similar(v), cache3 = similar(v))
+function num_hesvecgrad!(dy, g, x, v, cache1 = similar(v), cache2 = similar(v), cache3 = similar(v))
     T = eltype(x)
     # Should it be min? max? mean?
     ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
-    @. x += ϵ * v
-    g(cache2, x)
-    @. x -= 2ϵ * v
-    g(cache3, x)
-    @. x += ϵ * v
-    @. dy = (cache2 - cache3) / (2ϵ)
+    @. cache3 = x + ϵ * v
+    g(cache1, cache3)
+    @. cache3 = x - ϵ * v
+    g(cache2, cache3)
+    @. dy = (cache1 - cache2) / (2ϵ)
 end
 
 function num_hesvecgrad(g, x, v)

diff --git a/src/differentiation/vecjac_products.jl b/src/differentiation/vecjac_products.jl
@@ -1,14 +1,15 @@
-function num_vecjac!(du, f::F, x, v, cache1 = similar(v), cache2 = similar(v);
+function num_vecjac!(du, f::F, x, v, cache1 = similar(v), cache2 = similar(v), cache3 = similar(v);
         compute_f0 = true) where {F}
     compute_f0 && (f(cache1, x))
     T = eltype(x)
     # Should it be min? max? mean?
     ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
     vv = reshape(v, size(cache1))
+    cache3 .= x
     for i in 1:length(x)
-        x[i] += ϵ
-        f(cache2, x)
-        x[i] -= ϵ
+        cache3[i] += ϵ
+        f(cache2, cache3)
+        cache3[i] = x[i]
         du[i] = (((cache2 .- cache1) ./ ϵ)' * vv)[1]
     end
     return du
@@ -21,10 +22,11 @@
     # Should it be min? max? mean?
     ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
     du = similar(x)
+    cache = copy(x)
     for i in 1:length(x)
-        x[i] += ϵ
+        cache[i] += ϵ
         f0 = f(x)
-        x[i] -= ϵ
+        cache[i] = x[i]
         du[i] = (((f0 .- _f0) ./ ϵ)' * vv)[1]
     end
     return vec(du)
@@ -91,7 +93,7 @@
 end
 
 function _vecjac(f::F, fu, u, autodiff::AutoFiniteDiff) where {F}
-    cache = (similar(fu), similar(fu))
+    cache = (similar(fu), similar(fu), similar(fu))
     pullback = nothing
     return AutoDiffVJP(f, u, cache, autodiff, pullback)
 end