From 54ce5d50897099a2ad9ed94f8ede4a9f917a8640 Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Fri, 29 Dec 2023 03:16:17 -0500 Subject: [PATCH] Avoid mutation on user x Fixes https://github.com/JuliaDiff/SparseDiffTools.jl/issues/265 --- README.md | 20 +++++++----- docs/src/index.md | 22 ++++++++------ ext/SparseDiffToolsZygoteExt.jl | 11 +++---- src/differentiation/jaches_products.jl | 42 ++++++++++++-------------- src/differentiation/vecjac_products.jl | 16 +++++----- 5 files changed, 58 insertions(+), 53 deletions(-) diff --git a/README.md b/README.md index 0e6a43d4..ff36eeaf 100644 --- a/README.md +++ b/README.md @@ -282,7 +282,7 @@ H = numauto_color_hessian(f, x, colorvec, sparsity) numauto_color_hessian!(H, f, x, colorvec, sparsity) ``` -To avoid unnecessary allocations every time the Hessian is computed, +To avoid unnecessary allocations every time the Hessian is computed, construct a `ForwardColorHesCache` beforehand: ```julia @@ -292,7 +292,7 @@ numauto_color_hessian!(H, f, x, hescache) By default, these methods use a mix of numerical and automatic differentiation, namely by taking finite differences of gradients calculated via ForwardDiff.jl. -Alternatively, if you have your own custom gradient function `g!`, you can specify +Alternatively, if you have your own custom gradient function `g!`, you can specify it as an argument to `ForwardColorHesCache`: ```julia @@ -301,7 +301,6 @@ hescache = ForwardColorHesCache(f, x, colorvec, sparsity, g!) Note that any user-defined gradient needs to have the signature `g!(G, x)`, i.e. updating the gradient `G` in place. - ### Jacobian-Vector and Hessian-Vector Products Matrix-free implementations of Jacobian-Vector and Hessian-Vector products is @@ -322,7 +321,8 @@ auto_jacvec(f, x, v) # If compute_f0 is false, then `f(cache1,x)` will be computed num_jacvec!(dy,f,x,v,cache1 = similar(v), - cache2 = similar(v); + cache2 = similar(v), + cache3 = similar(v); compute_f0 = true) num_jacvec(f,x,v,f0=nothing) ``` @@ -333,14 +333,16 @@ For Hessians, the following are provided: num_hesvec!(dy,f,x,v, cache1 = similar(v), cache2 = similar(v), - cache3 = similar(v)) + cache3 = similar(v), + cache4 = similar(v)) num_hesvec(f,x,v) numauto_hesvec!(dy,f,x,v, cache = ForwardDiff.GradientConfig(f,v), cache1 = similar(v), - cache2 = similar(v)) + cache2 = similar(v), + cache3 = similar(v)) numauto_hesvec(f,x,v) @@ -358,6 +360,7 @@ respectively: ```julia num_hesvecgrad!(dy,g,x,v, + cache1 = similar(v), cache2 = similar(v), cache3 = similar(v)) @@ -384,7 +387,8 @@ using Zygote # Required numback_hesvec!(dy,f,x,v, cache1 = similar(v), - cache2 = similar(v)) + cache2 = similar(v), + cache3 = similar(v)) numback_hesvec(f,x,v) @@ -396,7 +400,7 @@ autoback_hesvec!(dy,f,x,v, autoback_hesvec(f,x,v) ``` -#### J*v and H*v Operators +#### `J*v` and `H*v` Operators The following produce matrix-free operators which are used for calculating Jacobian-vector and Hessian-vector products where the differentiation takes diff --git a/docs/src/index.md b/docs/src/index.md index 021d076f..3b52ed8e 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -244,7 +244,7 @@ forwarddiff_color_jacobian!(J::AbstractMatrix{<:Number}, `dx` is a pre-allocated output vector which is used to declare the output size, and thus allows for specifying a non-square Jacobian. -Also, it is possible retrieve the function value via `value(jac_cache)` or +Also, it is possible retrieve the function value via `value(jac_cache)` or `value!(result, jac_cache)` @@ -276,7 +276,7 @@ H = numauto_color_hessian(f, x, colorvec, sparsity) numauto_color_hessian!(H, f, x, colorvec, sparsity) ``` -To avoid unnecessary allocations every time the Hessian is computed, +To avoid unnecessary allocations every time the Hessian is computed, construct a `ForwardColorHesCache` beforehand: ```julia @@ -286,7 +286,7 @@ numauto_color_hessian!(H, f, x, hescache) By default, these methods use a mix of numerical and automatic differentiation, namely by taking finite differences of gradients calculated via ForwardDiff.jl. -Alternatively, if you have your own custom gradient function `g!`, you can specify +Alternatively, if you have your own custom gradient function `g!`, you can specify it as an argument to `ForwardColorHesCache`: ```julia @@ -295,7 +295,6 @@ hescache = ForwardColorHesCache(f, x, colorvec, sparsity, g!) Note that any user-defined gradient needs to have the signature `g!(G, x)`, i.e. updating the gradient `G` in place. - ### Jacobian-Vector and Hessian-Vector Products Matrix-free implementations of Jacobian-Vector and Hessian-Vector products is @@ -316,7 +315,8 @@ auto_jacvec(f, x, v) # If compute_f0 is false, then `f(cache1,x)` will be computed num_jacvec!(dy,f,x,v,cache1 = similar(v), - cache2 = similar(v); + cache2 = similar(v), + cache3 = similar(v); compute_f0 = true) num_jacvec(f,x,v,f0=nothing) ``` @@ -327,14 +327,16 @@ For Hessians, the following are provided: num_hesvec!(dy,f,x,v, cache1 = similar(v), cache2 = similar(v), - cache3 = similar(v)) + cache3 = similar(v), + cache4 = similar(v)) num_hesvec(f,x,v) numauto_hesvec!(dy,f,x,v, cache = ForwardDiff.GradientConfig(f,v), cache1 = similar(v), - cache2 = similar(v)) + cache2 = similar(v), + cache3 = similar(v)) numauto_hesvec(f,x,v) @@ -352,6 +354,7 @@ respectively: ```julia num_hesvecgrad!(dy,g,x,v, + cache1 = similar(v), cache2 = similar(v), cache3 = similar(v)) @@ -378,7 +381,8 @@ using Zygote # Required numback_hesvec!(dy,f,x,v, cache1 = similar(v), - cache2 = similar(v)) + cache2 = similar(v), + cache3 = similar(v)) numback_hesvec(f,x,v) @@ -390,7 +394,7 @@ autoback_hesvec!(dy,f,x,v, autoback_hesvec(f,x,v) ``` -#### J*v and H*v Operators +#### `J*v` and `H*v` Operators The following produce matrix-free operators which are used for calculating Jacobian-vector and Hessian-vector products where the differentiation takes diff --git a/ext/SparseDiffToolsZygoteExt.jl b/ext/SparseDiffToolsZygoteExt.jl index b5c39158..49c3fbf0 100644 --- a/ext/SparseDiffToolsZygoteExt.jl +++ b/ext/SparseDiffToolsZygoteExt.jl @@ -45,18 +45,17 @@ end ### Jac, Hes products -function numback_hesvec!(dy, f::F, x, v, cache1 = similar(v), cache2 = similar(v)) where {F} +function numback_hesvec!(dy, f::F, x, v, cache1 = similar(v), cache2 = similar(v), cache3 = similar(v)) where {F} g = let f = f (dx, x) -> dx .= first(Zygote.gradient(f, x)) end T = eltype(x) # Should it be min? max? mean? ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) - @. x += ϵ * v - g(cache1, x) - @. x -= 2ϵ * v - g(cache2, x) - @. x += ϵ * v + @. cache3 = x + ϵ * v + g(cache1, cache3) + @. cache3 = x - ϵ * v + g(cache2, cache3) @. dy = (cache1 - cache2) / (2ϵ) end diff --git a/src/differentiation/jaches_products.jl b/src/differentiation/jaches_products.jl index 3c50d210..604a10ce 100644 --- a/src/differentiation/jaches_products.jl +++ b/src/differentiation/jaches_products.jl @@ -32,16 +32,15 @@ function auto_jacvec(f, x, v) vec(partials.(vec(f(y)), 1)) end -function num_jacvec!(dy, f, x, v, cache1 = similar(v), cache2 = similar(v); +function num_jacvec!(dy, f, x, v, cache1 = similar(v), cache2 = similar(v), cache3 = similar(v); compute_f0 = true) vv = reshape(v, axes(x)) compute_f0 && (f(cache1, x)) T = eltype(x) # Should it be min? max? mean? ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) - @. x += ϵ * vv - f(cache2, x) - @. x -= ϵ * vv + @. cache3 = x + ϵ * vv + f(cache2, cache3) vecdy = _vec(dy) veccache1 = _vec(cache1) veccache2 = _vec(cache2) @@ -58,7 +57,7 @@ function num_jacvec(f, x, v, f0 = nothing) end function num_hesvec!(dy, f, x, v, cache1 = similar(v), cache2 = similar(v), - cache3 = similar(v)) + cache3 = similar(v), cache4 = similar(v)) cache = FiniteDiff.GradientCache(v[1], cache1, Val{:central}) g = let f = f, cache = cache (dx, x) -> FiniteDiff.finite_difference_gradient!(dx, f, x, cache) @@ -66,11 +65,10 @@ function num_hesvec!(dy, f, x, v, cache1 = similar(v), cache2 = similar(v), T = eltype(x) # Should it be min? max? mean? ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) - @. x += ϵ * v - g(cache2, x) - @. x -= 2ϵ * v - g(cache3, x) - @. x += ϵ * v + @. cache4 = x + ϵ * v + g(cache2, cache4) + @. cache4 = x - ϵ * v + g(cache3, cache4) @. dy = (cache2 - cache3) / (2ϵ) end @@ -87,18 +85,17 @@ function num_hesvec(f, x, v) end function numauto_hesvec!(dy, f, x, v, cache = ForwardDiff.GradientConfig(f, v), - cache1 = similar(v), cache2 = similar(v)) + cache1 = similar(v), cache2 = similar(v), cache3 = similar(v)) g = let f = f, x = x, cache = cache g = (dx, x) -> ForwardDiff.gradient!(dx, f, x, cache) end T = eltype(x) # Should it be min? max? mean? ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) - @. x += ϵ * v - g(cache1, x) - @. x -= 2ϵ * v - g(cache2, x) - @. x += ϵ * v + @. cache3 = x + ϵ * v + g(cache1, cache3) + @. cache3 = x - ϵ * v + g(cache2, cache3) @. dy = (cache1 - cache2) / (2ϵ) end @@ -137,16 +134,15 @@ function autonum_hesvec(f, x, v) partials.(g(Dual{DeivVecTag}.(x, v)), 1) end -function num_hesvecgrad!(dy, g, x, v, cache2 = similar(v), cache3 = similar(v)) +function num_hesvecgrad!(dy, g, x, v, cache1 = similar(v), cache2 = similar(v), cache3 = similar(v)) T = eltype(x) # Should it be min? max? mean? ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) - @. x += ϵ * v - g(cache2, x) - @. x -= 2ϵ * v - g(cache3, x) - @. x += ϵ * v - @. dy = (cache2 - cache3) / (2ϵ) + @. cache3 = x + ϵ * v + g(cache1, cache3) + @. cache3 = x - ϵ * v + g(cache2, cache3) + @. dy = (cache1 - cache2) / (2ϵ) end function num_hesvecgrad(g, x, v) diff --git a/src/differentiation/vecjac_products.jl b/src/differentiation/vecjac_products.jl index 998c47ac..7f827583 100644 --- a/src/differentiation/vecjac_products.jl +++ b/src/differentiation/vecjac_products.jl @@ -1,14 +1,15 @@ -function num_vecjac!(du, f::F, x, v, cache1 = similar(v), cache2 = similar(v); +function num_vecjac!(du, f::F, x, v, cache1 = similar(v), cache2 = similar(v), cache3 = similar(v); compute_f0 = true) where {F} compute_f0 && (f(cache1, x)) T = eltype(x) # Should it be min? max? mean? ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) vv = reshape(v, size(cache1)) + cache3 .= x for i in 1:length(x) - x[i] += ϵ - f(cache2, x) - x[i] -= ϵ + cache3[i] += ϵ + f(cache2, cache3) + cache3[i] = x[i] du[i] = (((cache2 .- cache1) ./ ϵ)' * vv)[1] end return du @@ -21,10 +22,11 @@ function num_vecjac(f::F, x, v, f0 = nothing) where {F} # Should it be min? max? mean? ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) du = similar(x) + cache = copy(x) for i in 1:length(x) - x[i] += ϵ + cache[i] += ϵ f0 = f(x) - x[i] -= ϵ + cache[i] = x[i] du[i] = (((f0 .- _f0) ./ ϵ)' * vv)[1] end return vec(du) @@ -91,7 +93,7 @@ function VecJac(f, u::AbstractArray, p = nothing, t = nothing; fu = nothing, end function _vecjac(f::F, fu, u, autodiff::AutoFiniteDiff) where {F} - cache = (similar(fu), similar(fu)) + cache = (similar(fu), similar(fu), similar(fu)) pullback = nothing return AutoDiffVJP(f, u, cache, autodiff, pullback) end