From 640770571ec2397e0f68fa8bb4c161102094b6b7 Mon Sep 17 00:00:00 2001 From: Nathanael Bosch Date: Mon, 19 Feb 2024 11:00:32 +0000 Subject: [PATCH] Add the DiagonalEK1 (#301) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add a BlockDiagonal implementation * It works and it's (a little bit) faster than dense! * Implement a first version of the DiagonalEK1 * Added smoothing * Add BlockDiagonals to the tests * This should be the proper logic to choose the cov factorization * We can now select the covariance from the outside! * Add some SIMD here and there * Make views of BlockDiagonals illegal as they are super slow * Change how the diffusions work * Make some more diffusions work * Better checking for validity of algorithm arguments * More BlockDiagonal linalg things * Better handling of the diffusion for prediction * The global diffusion is now written into the cache directly * Implement rmul! or the IsometricKroneckerProduct * Properly ply the new diffusion after the solve * Properly estimate the global scalar diffusion * Properly implement the global MV diffusion * This should be a proper implementation of the dynamic MV diffusion * Try to fix how the prediction handles the diffusion (I failed) * Try to get the DynamicMV diff to work with BlockDiag cov (but fail) * Get the DiagonalEK1 to work with a dense covariance factorization * Check diffusion and factorization compat somewhere else and warn instead of erroring * JuliaFormatter.jl * Implement my own BlockDiag type * JuliaFormatter.jl * Start fixing some tests * Remove duplicate matmul implementation * Fix some failing state init tests * Improve the diffusion handling some more * Enable the EK0 again with priors that are not Kronecker * Remove one test case that's not yet supported * Significantly speed up the secondorderodeproblem tests * Remove a test that currently fails * Fix many of the tests that I had to temporally remove * Make it more obvious that BlockDiagonals and second order ODEs are not working * Rename our BlockDiagonals to ProbNumDiffEqBlockDiagonal (+ shortcut) * Add a BlockDiagonals extension to transfomr ours to theirs * Add unit-tests for our `BlockDiag`s * Check that the K.b is actually empty * Add versions to overload also the non-blasfloat matmuls * Make some code more compact and readable * Change order to fit acronym * For some reson the eval tests failed; so fix them * Make the if else order in predict and backward kernel easier * misc * Properly implement `size` * Remove an inbounds as we don't explicitly do a sizecheck * Remove some checks again as they are irrelevant for the cov * Add a very minimal docstring to ProbNumDiffEqBlockDiagonal * Better BlockDiagonals and a bit of Kronecker * Make the DiagonalEK1 work again (except for secondorderodes) * Test the diffusions (found a bug! unittests are actually nice) * Give the diffusions much more space * Grealy simplify the local error estimate code * Better predict tests * Beter update and smoothing tests * Praise the lord for unittests * JuliaFormatter.jl * Actually git the diffusion tests * Testfix * Remove some comments * Test that the computed log-likelihood is correct * Add BlockDiagonals compat entry * Add more solvers to the autodiff tests * Add much more tests * JuliaFormatter.jl * Better complexity test * Actually do a proper test to check the scaling of the solvers * JuliaFormatter.jl * Make preconditioner computation simpler and test better * One more prior test * Fix some tests * Get the data likelihoods to work with DiagonalEK1 and the EK0 * Make the data likelihoods better * JuliaFormatter.jl * Relax the complexity tests even more * JuliaFormatter.jl * Remove some unused code * Bisschen code upgrade für die data likelihoods * Make the complexity tests more compact * Remove some unused things, mainly to re-trigger gh actions * Fix the bad getindex for BlockDiag * Use the built-in matrix exponential * Remore parts of a test that are not implemented anymore * Improve coverage a bit * JuliaFormatter.jl * Check better what observation noise works with what factorization * Fix the pn_observation_noise check * More BlockDiag tests * Add some more Kronecker tests * JuliaFormatter.jl * Fix test * Make the FixedMVDiffusion work with dense matrices * Make the FixedMVDiffusion work with the data likelihoods * Remove some comments * Add `add!` to the Kronecker tests and shorten them a bit * Fix the failing test that I just found --- Project.toml | 10 +- ext/BlockDiagonalsExt.jl | 8 + src/ProbNumDiffEq.jl | 25 +- src/alg_utils.jl | 18 +- src/algorithms.jl | 151 +++++++++-- src/blockdiagonals.jl | 321 +++++++++++++++++++++++ src/caches.jl | 37 +-- src/callbacks/dataupdate.jl | 61 +++-- src/checks.jl | 2 +- src/covariance_structure.jl | 44 ++-- src/data_likelihoods/fenrir.jl | 20 +- src/derivative_utils.jl | 10 +- src/diffusions.jl | 213 ---------------- src/diffusions/apply_diffusion.jl | 102 ++++++++ src/diffusions/calibration.jl | 178 +++++++++++++ src/diffusions/typedefs.jl | 103 ++++++++ src/fast_linalg.jl | 46 ++-- src/filtering/markov_kernel.jl | 88 ++++++- src/filtering/predict.jl | 38 ++- src/filtering/update.jl | 44 ++++ src/initialization/classicsolverinit.jl | 4 +- src/integrator_utils.jl | 25 +- src/kronecker.jl | 29 +++ src/perform_step.jl | 49 ++-- src/preconditioning.jl | 34 ++- src/priors/iwp.jl | 13 +- src/priors/ltisde.jl | 38 +-- src/projection.jl | 19 ++ test/Project.toml | 2 +- test/autodiff.jl | 83 +++--- test/complexity.jl | 120 ++++----- test/core/blockdiagonals.jl | 85 +++++++ test/core/diffusions.jl | 59 +++++ test/core/fast_linalg.jl | 52 ++++ test/core/filtering.jl | 324 ++++++++++++++---------- test/core/kronecker.jl | 70 ++--- test/core/preconditioning.jl | 23 +- test/core/priors.jl | 74 +++--- test/correctness.jl | 13 + test/data_likelihoods.jl | 54 ++-- test/errors_thrown.jl | 27 ++ test/mass_matrix.jl | 17 ++ test/observation_noise.jl | 28 ++ test/runtests.jl | 12 + test/secondorderode.jl | 14 +- test/state_init.jl | 2 +- test/stiff_problem.jl | 4 + 47 files changed, 1999 insertions(+), 794 deletions(-) create mode 100644 ext/BlockDiagonalsExt.jl create mode 100644 src/blockdiagonals.jl delete mode 100644 src/diffusions.jl create mode 100644 src/diffusions/apply_diffusion.jl create mode 100644 src/diffusions/calibration.jl create mode 100644 src/diffusions/typedefs.jl create mode 100644 test/core/blockdiagonals.jl create mode 100644 test/core/diffusions.jl create mode 100644 test/core/fast_linalg.jl create mode 100644 test/observation_noise.jl diff --git a/Project.toml b/Project.toml index ed5af8bb9..6dc1f5565 100644 --- a/Project.toml +++ b/Project.toml @@ -7,11 +7,8 @@ version = "0.15.0" ArrayAllocators = "c9d4266f-a5cb-439d-837c-c97b191379f5" DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e" DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def" -DiffEqDevTools = "f3b72e0c-5b89-59e1-b016-84e28bfd966d" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" -ExponentialUtilities = "d4d017d3-3776-5f7e-afef-a10c40355c18" FastBroadcast = "7034ab61-46d4-4ed7-9d0f-46aef9175898" -FastGaussQuadrature = "442a2c76-b920-505d-bb47-c5924d526838" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" FiniteHorizonGramians = "b59a298d-d283-4a37-9369-85a9f9a111a5" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" @@ -30,7 +27,6 @@ RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" SimpleUnPack = "ce78b400-467f-4804-87d8-8f486da07d0a" -SpecialMatrices = "928aab9d-ef52-54ac-8ca1-acd7ca42c160" StaticArrayInterface = "0d7ed370-da01-4f52-bd93-41d350b8b718" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" @@ -39,22 +35,23 @@ TaylorSeries = "6aa5eb33-94cf-58f4-a9d0-e4b2c4fc25ea" ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24" [weakdeps] +BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0" DiffEqDevTools = "f3b72e0c-5b89-59e1-b016-84e28bfd966d" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" [extensions] +BlockDiagonalsExt = "BlockDiagonals" DiffEqDevToolsExt = "DiffEqDevTools" RecipesBaseExt = "RecipesBase" [compat] ArrayAllocators = "0.3" +BlockDiagonals = "0.1" DiffEqBase = "6.122" DiffEqCallbacks = "2.36" DiffEqDevTools = "2" DocStringExtensions = "0.9" -ExponentialUtilities = "1" FastBroadcast = "0.2" -FastGaussQuadrature = "0.5, 1" FillArrays = "1.9" FiniteHorizonGramians = "0.2" ForwardDiff = "0.10" @@ -73,7 +70,6 @@ RecursiveArrayTools = "2, 3" Reexport = "1" SciMLBase = "1.90, 2" SimpleUnPack = "1" -SpecialMatrices = "3" StaticArrayInterface = "1.3" Statistics = "1" StructArrays = "0.4, 0.5, 0.6" diff --git a/ext/BlockDiagonalsExt.jl b/ext/BlockDiagonalsExt.jl new file mode 100644 index 000000000..37a7f6bfc --- /dev/null +++ b/ext/BlockDiagonalsExt.jl @@ -0,0 +1,8 @@ +module BlockDiagonalsExt + +import ProbNumDiffEq: ProbNumDiffEqBlockDiagonal, blocks +import BlockDiagonals: BlockDiagonal + +BlockDiagonal(M::ProbNumDiffEqBlockDiagonal) = BlockDiagonal(blocks(M)) + +end diff --git a/src/ProbNumDiffEq.jl b/src/ProbNumDiffEq.jl index eac2a17cb..1d3756455 100644 --- a/src/ProbNumDiffEq.jl +++ b/src/ProbNumDiffEq.jl @@ -2,7 +2,8 @@ __precompile__() module ProbNumDiffEq -import Base: copy, copy!, show, size, ndims, similar, isapprox, isequal, iterate, ==, length +import Base: + copy, copy!, show, size, ndims, similar, isapprox, isequal, iterate, ==, length, zero using LinearAlgebra import LinearAlgebra: mul! @@ -15,7 +16,7 @@ using Reexport import SciMLBase import SciMLBase: interpret_vars, getsyms, remake using OrdinaryDiffEq -using SpecialMatrices, ToeplitzMatrices +using ToeplitzMatrices using FastBroadcast using StaticArrayInterface using FunctionWrappersWrappers @@ -24,9 +25,7 @@ using TaylorSeries, TaylorIntegration using SimpleUnPack using RecursiveArrayTools using ForwardDiff -using ExponentialUtilities using Octavian -using FastGaussQuadrature import Kronecker using ArrayAllocators using FiniteHorizonGramians @@ -45,15 +44,27 @@ vecvec2mat(x) = reduce(hcat, x)' cov2psdmatrix(cov::Number; d) = PSDMatrix(sqrt(cov) * Eye(d)) cov2psdmatrix(cov::UniformScaling; d) = PSDMatrix(sqrt(cov.λ) * Eye(d)) +cov2psdmatrix(cov::Diagonal{<:Number,<:FillArrays.Fill}; d) = + (@assert size(cov, 1) == size(cov, 2) == d; cov2psdmatrix(cov.diag.value; d)) cov2psdmatrix(cov::Diagonal; d) = (@assert size(cov, 1) == size(cov, 2) == d; PSDMatrix(sqrt.(cov))) cov2psdmatrix(cov::AbstractMatrix; d) = (@assert size(cov, 1) == size(cov, 2) == d; PSDMatrix(Matrix(cholesky(cov).U))) cov2psdmatrix(cov::PSDMatrix; d) = (@assert size(cov, 1) == size(cov, 2) == d; cov) +""" + add!(out, toadd) + +Add `toadd` to `out` in-place. +""" +add! +add!(out, toadd) = (out .+= toadd) + include("fast_linalg.jl") include("kronecker.jl") +include("blockdiagonals.jl") include("covariance_structure.jl") +export IsometricKroneckerCovariance, DenseCovariance, BlockDiagonalCovariance abstract type AbstractODEFilterCache <: OrdinaryDiffEq.OrdinaryDiffEqCache end @@ -65,14 +76,16 @@ include("priors/ltisde.jl") include("priors/ioup.jl") include("priors/matern.jl") export IWP, IOUP, Matern -include("diffusions.jl") +include("diffusions/typedefs.jl") +include("diffusions/apply_diffusion.jl") +include("diffusions/calibration.jl") export FixedDiffusion, DynamicDiffusion, FixedMVDiffusion, DynamicMVDiffusion include("initialization/common.jl") export TaylorModeInit, ClassicSolverInit, SimpleInit, ForwardDiffInit include("algorithms.jl") -export EK0, EK1 +export EK0, EK1, DiagonalEK1 export ExpEK, RosenbrockExpEK include("alg_utils.jl") diff --git a/src/alg_utils.jl b/src/alg_utils.jl index a3c84736f..4fbbddd1b 100644 --- a/src/alg_utils.jl +++ b/src/alg_utils.jl @@ -4,19 +4,25 @@ ############################################################################################ OrdinaryDiffEq._alg_autodiff(::AbstractEK) = Val{true}() -OrdinaryDiffEq._alg_autodiff(::EK1{CS,AD}) where {CS,AD} = Val{AD}() -OrdinaryDiffEq.alg_difftype(::EK1{CS,AD,DiffType}) where {CS,AD,DiffType} = DiffType OrdinaryDiffEq.standardtag(::AbstractEK) = false -OrdinaryDiffEq.standardtag(::EK1{CS,AD,DiffType,ST}) where {CS,AD,DiffType,ST} = ST OrdinaryDiffEq.concrete_jac(::AbstractEK) = nothing -OrdinaryDiffEq.concrete_jac(::EK1{CS,AD,DiffType,ST,CJ}) where {CS,AD,DiffType,ST,CJ} = CJ @inline DiffEqBase.get_tmp_cache(integ, alg::AbstractEK, cache::AbstractODEFilterCache) = (cache.tmp, cache.atmp) -OrdinaryDiffEq.get_chunksize(::EK1{CS}) where {CS} = Val(CS) OrdinaryDiffEq.isfsal(::AbstractEK) = false -OrdinaryDiffEq.isimplicit(::EK1) = true +for ALG in [:EK1, :DiagonalEK1] + @eval OrdinaryDiffEq._alg_autodiff(::$ALG{CS,AD}) where {CS,AD} = Val{AD}() + @eval OrdinaryDiffEq.alg_difftype(::$ALG{CS,AD,DiffType}) where {CS,AD,DiffType} = + DiffType + @eval OrdinaryDiffEq.standardtag(::$ALG{CS,AD,DiffType,ST}) where {CS,AD,DiffType,ST} = + ST + @eval OrdinaryDiffEq.concrete_jac( + ::$ALG{CS,AD,DiffType,ST,CJ}, + ) where {CS,AD,DiffType,ST,CJ} = CJ + @eval OrdinaryDiffEq.get_chunksize(::$ALG{CS}) where {CS} = Val(CS) + @eval OrdinaryDiffEq.isimplicit(::$ALG) = true +end ############################################ # Step size control diff --git a/src/algorithms.jl b/src/algorithms.jl index 9982f0f5d..ba535e334 100644 --- a/src/algorithms.jl +++ b/src/algorithms.jl @@ -3,7 +3,13 @@ ######################################################################################## abstract type AbstractEK <: OrdinaryDiffEq.OrdinaryDiffEqAdaptiveAlgorithm end -function ekargcheck(alg; diffusionmodel, pn_observation_noise, kwargs...) +function ekargcheck( + alg; + diffusionmodel, + pn_observation_noise, + covariance_factorization, + kwargs..., +) if (isstatic(diffusionmodel) && diffusionmodel.calibrate) && (!isnothing(pn_observation_noise) && !iszero(pn_observation_noise)) throw( @@ -12,14 +18,63 @@ function ekargcheck(alg; diffusionmodel, pn_observation_noise, kwargs...) ), ) end - if ( - (diffusionmodel isa FixedMVDiffusion && diffusionmodel.calibrate) || - diffusionmodel isa DynamicMVDiffusion) && alg == EK1 - throw( - ArgumentError( - "The `EK1` algorithm does not support automatic calibration of multivariate diffusion models. Either use the `EK0` instead, or use a scalar diffusion model, or set `calibrate=false` and calibrate manually by optimizing `sol.pnstats.log_likelihood`.", - ), - ) + if alg == EK1 + if diffusionmodel isa FixedMVDiffusion && diffusionmodel.calibrate + throw( + ArgumentError( + "The `EK1` algorithm does not support automatic global calibration of multivariate diffusion models. Either use a scalar diffusion model, or set `calibrate=false` and calibrate manually by optimizing `sol.pnstats.log_likelihood`. Or use a different solve, like `EK0` or `DiagonalEK1`.", + ), + ) + elseif diffusionmodel isa DynamicMVDiffusion + throw( + ArgumentError( + "The `EK1` algorithm does not support automatic calibration of local multivariate diffusion models. Either use a scalar diffusion model, or use a different solve, like `EK0` or `DiagonalEK1`.", + ), + ) + end + end + if !(isnothing(pn_observation_noise) || ismissing(pn_observation_noise)) + if covariance_factorization == IsometricKroneckerCovariance && !( + pn_observation_noise isa Number + || pn_observation_noise isa UniformScaling + || pn_observation_noise isa Diagonal{<:Number,<:FillArrays.Fill}) + throw( + ArgumentError( + "The supplied `pn_observation_noise` is not compatible with the chosen `IsometricKroneckerCovariance` factorization. Try one of `BlockDiagonalCovariance` or `DenseCovariance` instead!", + ), + ) + end + if covariance_factorization == BlockDiagonalCovariance && !( + pn_observation_noise isa Number + || pn_observation_noise isa UniformScaling + || pn_observation_noise isa Diagonal) + throw( + ArgumentError( + "The supplied `pn_observation_noise` is not compatible with the chosen `BlockDiagonalCovariance` factorization. Try `DenseCovariance` instead!", + ), + ) + end + end +end + +function covariance_structure(::Type{Alg}, prior, diffusionmodel) where {Alg<:AbstractEK} + if Alg <: EK0 + if prior isa IWP + if (diffusionmodel isa DynamicDiffusion || diffusionmodel isa FixedDiffusion) + return IsometricKroneckerCovariance + else + return BlockDiagonalCovariance + end + else + # This is not great as other priors can be Kronecker too; TODO + return DenseCovariance + end + elseif Alg <: DiagonalEK1 + return BlockDiagonalCovariance + elseif Alg <: EK1 + return DenseCovariance + else + throw(ArgumentError("Unknown algorithm type $Alg")) end end @@ -58,22 +113,25 @@ julia> solve(prob, EK0()) # [References](@ref references) """ -struct EK0{PT,DT,IT,RT} <: AbstractEK +struct EK0{PT,DT,IT,RT,CF} <: AbstractEK prior::PT diffusionmodel::DT smooth::Bool initialization::IT pn_observation_noise::RT + covariance_factorization::CF EK0(; order=3, prior::PT=IWP(order), diffusionmodel::DT=DynamicDiffusion(), smooth=true, initialization::IT=TaylorModeInit(num_derivatives(prior)), pn_observation_noise::RT=nothing, - ) where {PT,DT,IT,RT} = begin - ekargcheck(EK0; diffusionmodel, pn_observation_noise) - new{PT,DT,IT,RT}( - prior, diffusionmodel, smooth, initialization, pn_observation_noise) + covariance_factorization::CF=covariance_structure(EK0, prior, diffusionmodel), + ) where {PT,DT,IT,RT,CF} = begin + ekargcheck(EK0; diffusionmodel, pn_observation_noise, covariance_factorization) + new{PT,DT,IT,RT,CF}( + prior, diffusionmodel, smooth, initialization, pn_observation_noise, + covariance_factorization) end end @@ -117,12 +175,13 @@ julia> solve(prob, EK1()) # [References](@ref references) """ -struct EK1{CS,AD,DiffType,ST,CJ,PT,DT,IT,RT} <: AbstractEK +struct EK1{CS,AD,DiffType,ST,CJ,PT,DT,IT,RT,CF} <: AbstractEK prior::PT diffusionmodel::DT smooth::Bool initialization::IT pn_observation_noise::RT + covariance_factorization::CF EK1(; order=3, prior::PT=IWP(order), @@ -135,8 +194,57 @@ struct EK1{CS,AD,DiffType,ST,CJ,PT,DT,IT,RT} <: AbstractEK standardtag=Val{true}(), concrete_jac=nothing, pn_observation_noise::RT=nothing, - ) where {PT,DT,IT,RT} = begin - ekargcheck(EK1; diffusionmodel, pn_observation_noise) + covariance_factorization::CF=covariance_structure(EK1, prior, diffusionmodel), + ) where {PT,DT,IT,RT,CF} = begin + ekargcheck(EK1; diffusionmodel, pn_observation_noise, covariance_factorization) + new{ + _unwrap_val(chunk_size), + _unwrap_val(autodiff), + diff_type, + _unwrap_val(standardtag), + _unwrap_val(concrete_jac), + PT, + DT, + IT, + RT, + CF, + }( + prior, + diffusionmodel, + smooth, + initialization, + pn_observation_noise, + covariance_factorization, + ) + end +end + +struct DiagonalEK1{CS,AD,DiffType,ST,CJ,PT,DT,IT,RT,CF} <: AbstractEK + prior::PT + diffusionmodel::DT + smooth::Bool + initialization::IT + pn_observation_noise::RT + covariance_factorization::CF + DiagonalEK1(; + order=3, + prior::PT=IWP(order), + diffusionmodel::DT=DynamicDiffusion(), + smooth=true, + initialization::IT=TaylorModeInit(num_derivatives(prior)), + chunk_size=Val{0}(), + autodiff=Val{true}(), + diff_type=Val{:forward}, + standardtag=Val{true}(), + concrete_jac=nothing, + pn_observation_noise::RT=nothing, + covariance_factorization::CF=covariance_structure( + DiagonalEK1, + prior, + diffusionmodel, + ), + ) where {PT,DT,IT,RT,CF} = begin + ekargcheck(DiagonalEK1; diffusionmodel, pn_observation_noise, covariance_factorization) new{ _unwrap_val(chunk_size), _unwrap_val(autodiff), @@ -147,12 +255,14 @@ struct EK1{CS,AD,DiffType,ST,CJ,PT,DT,IT,RT} <: AbstractEK DT, IT, RT, + CF, }( prior, diffusionmodel, smooth, initialization, pn_observation_noise, + covariance_factorization, ) end end @@ -236,7 +346,12 @@ function DiffEqBase.remake(thing::EK1{CS,AD,DT,ST,CJ}; kwargs...) where {CS,AD,D ) end -function DiffEqBase.prepare_alg(alg::EK1{0}, u0::AbstractArray{T}, p, prob) where {T} +function DiffEqBase.prepare_alg( + alg::Union{EK1{0},DiagonalEK1{0}}, + u0::AbstractArray{T}, + p, + prob, +) where {T} # See OrdinaryDiffEq.jl: ./src/alg_utils.jl (where this is copied from). # In the future we might want to make EK1 an OrdinaryDiffEqAdaptiveImplicitAlgorithm and # use the prepare_alg from OrdinaryDiffEq; but right now, we do not use `linsolve` which diff --git a/src/blockdiagonals.jl b/src/blockdiagonals.jl new file mode 100644 index 000000000..6ec085a01 --- /dev/null +++ b/src/blockdiagonals.jl @@ -0,0 +1,321 @@ +""" + ProbNumDiffEqBlockDiagonal(blocks::Vector{V}) where {T,V<:AbstractMatrix{T}} + +A very minimal but fast re-implementation of `BlockDiagonals.Blockdiagonal`. +""" +struct ProbNumDiffEqBlockDiagonal{T<:Number,V<:AbstractMatrix{T}} <: AbstractMatrix{T} + blocks::Vector{V} + function ProbNumDiffEqBlockDiagonal{T,V}( + blocks::Vector{V}, + ) where {T,V<:AbstractMatrix{T}} + return new{T,V}(blocks) + end +end +function ProbNumDiffEqBlockDiagonal(blocks::Vector{V}) where {T,V<:AbstractMatrix{T}} + return ProbNumDiffEqBlockDiagonal{T,V}(blocks) +end +const BlockDiag = ProbNumDiffEqBlockDiagonal + +blocks(B::BlockDiag) = B.blocks +nblocks(B::BlockDiag) = length(B.blocks) +size(B::BlockDiag) = mapreduce(size, ((a, b), (c, d)) -> (a + c, b + d), blocks(B)) + +Base.@propagate_inbounds function Base.getindex( + B::BlockDiag{T}, + i::Integer, + j::Integer, +) where {T} + all((0, 0) .< (i, j) .<= size(B)) || throw(BoundsError(B, (i, j))) + + p = 1 + Si, Sj = size(blocks(B)[p]) + while p <= nblocks(B) + if i <= Si && j <= Sj + return blocks(B)[p][i, j] + elseif (i <= Si && j > Sj) || (j <= Sj && i > Si) + return zero(T) + else + i -= Si + j -= Sj + p += 1 + end + end + error("This shouldn't happen") +end + +Base.view(::BlockDiag, idxs...) = + throw(ErrorException("`BlockDiag` does not support views!")) + +copy(B::BlockDiag) = BlockDiag(copy.(blocks(B))) +copy!(B::BlockDiag, A::BlockDiag) = begin + @assert length(A.blocks) == length(B.blocks) + @simd ivdep for i in eachindex(blocks(B)) + copy!(B.blocks[i], A.blocks[i]) + end + return B +end +similar(B::BlockDiag) = BlockDiag(similar.(blocks(B))) +zero(B::BlockDiag) = BlockDiag(zero.(blocks(B))) + +# Sums of BlockDiag +Base.:+(A::BlockDiag, B::BlockDiag) = begin + @assert nblocks(A) == nblocks(B) + return BlockDiag([Ai + Bi for (Ai, Bi) in zip(blocks(A), blocks(B))]) +end +Base.:-(A::BlockDiag, B::BlockDiag) = begin + @assert nblocks(A) == nblocks(B) + return BlockDiag([Ai - Bi for (Ai, Bi) in zip(blocks(A), blocks(B))]) +end + +add!(out::BlockDiag, toadd::BlockDiag) = begin + @assert nblocks(out) == nblocks(toadd) + @simd ivdep for i in eachindex(blocks(out)) + add!(blocks(out)[i], blocks(toadd)[i]) + end + return out +end + +# Mul with Scalar or UniformScaling +Base.:*(a::Number, M::BlockDiag) = BlockDiag([a * B for B in blocks(M)]) +Base.:*(M::BlockDiag, a::Number) = BlockDiag([B * a for B in blocks(M)]) +Base.:*(U::UniformScaling, M::BlockDiag) = BlockDiag([U * B for B in blocks(M)]) +Base.:*(M::BlockDiag, U::UniformScaling) = BlockDiag([B * U for B in blocks(M)]) + +# Mul between BockDiag's +Base.:*(A::BlockDiag, B::BlockDiag) = begin + @assert length(A.blocks) == length(B.blocks) + return BlockDiag([Ai * Bi for (Ai, Bi) in zip(blocks(A), blocks(B))]) +end +Base.:*(A::Adjoint{T,<:BlockDiag}, B::BlockDiag) where {T} = begin + @assert length(A.parent.blocks) == length(B.blocks) + return BlockDiag([Ai' * Bi for (Ai, Bi) in zip(blocks(A.parent), blocks(B))]) +end +Base.:*(A::BlockDiag, B::Adjoint{T,<:BlockDiag}) where {T} = begin + @assert length(A.blocks) == length(B.parent.blocks) + return BlockDiag([Ai * Bi' for (Ai, Bi) in zip(blocks(A), blocks(B.parent))]) +end + +# Standard LinearAlgebra.mul! +for _mul! in (:mul!, :_matmul!) + @eval $_mul!(C::BlockDiag, A::BlockDiag, B::BlockDiag) = begin + @assert length(C.blocks) == length(A.blocks) == length(B.blocks) + @simd ivdep for i in eachindex(blocks(C)) + @inbounds $_mul!(C.blocks[i], A.blocks[i], B.blocks[i]) + end + return C + end + (_mul! == :_matmul!) && @eval $_mul!( + C::BlockDiag{T}, + A::BlockDiag{T}, + B::BlockDiag{T}, + ) where {T<:LinearAlgebra.BlasFloat} = begin + @assert length(C.blocks) == length(A.blocks) == length(B.blocks) + @simd ivdep for i in eachindex(blocks(C)) + @inbounds $_mul!(C.blocks[i], A.blocks[i], B.blocks[i]) + end + return C + end + + @eval $_mul!(C::BlockDiag, A::BlockDiag, B::BlockDiag, alpha::Number, beta::Number) = + begin + @assert length(C.blocks) == length(A.blocks) == length(B.blocks) + @simd ivdep for i in eachindex(blocks(C)) + @inbounds $_mul!(C.blocks[i], A.blocks[i], B.blocks[i], alpha, beta) + end + return C + end + (_mul! == :_matmul!) && @eval $_mul!( + C::BlockDiag{T}, + A::BlockDiag{T}, + B::BlockDiag{T}, + alpha::Number, + beta::Number, + ) where {T<:LinearAlgebra.BlasFloat} = begin + @assert length(C.blocks) == length(A.blocks) == length(B.blocks) + @simd ivdep for i in eachindex(blocks(C)) + @inbounds $_mul!(C.blocks[i], A.blocks[i], B.blocks[i], alpha, beta) + end + return C + end + + @eval $_mul!(C::BlockDiag, A::Adjoint{<:Number,<:BlockDiag}, B::BlockDiag) = begin + @assert length(C.blocks) == length(A.parent.blocks) == length(B.blocks) + @simd ivdep for i in eachindex(blocks(C)) + @inbounds $_mul!(C.blocks[i], adjoint(A.parent.blocks[i]), B.blocks[i]) + end + return C + end + (_mul! == :_matmul!) && @eval $_mul!( + C::BlockDiag{T}, + A::BlockDiag{T}, + B::Adjoint{T,<:BlockDiag{T}}, + ) where {T<:LinearAlgebra.BlasFloat} = begin + @assert length(C.blocks) == length(A.blocks) == length(B.parent.blocks) + @simd ivdep for i in eachindex(blocks(C)) + @inbounds $_mul!(C.blocks[i], A.blocks[i], adjoint(B.parent.blocks[i])) + end + return C + end + + @eval $_mul!(C::BlockDiag, A::BlockDiag, B::Adjoint{<:Number,<:BlockDiag}) = begin + @assert length(C.blocks) == length(A.blocks) == length(B.parent.blocks) + @simd ivdep for i in eachindex(blocks(C)) + @inbounds $_mul!(C.blocks[i], A.blocks[i], adjoint(B.parent.blocks[i])) + end + return C + end + (_mul! == :_matmul!) && @eval $_mul!( + C::BlockDiag{T}, + A::Adjoint{T,<:BlockDiag{T}}, + B::BlockDiag{T}, + ) where {T<:LinearAlgebra.BlasFloat} = begin + @assert length(C.blocks) == length(A.parent.blocks) == length(B.blocks) + @simd ivdep for i in eachindex(blocks(C)) + @inbounds $_mul!(C.blocks[i], adjoint(A.parent.blocks[i]), B.blocks[i]) + end + return C + end + + @eval $_mul!(C::BlockDiag, A::Number, B::BlockDiag) = begin + @assert length(C.blocks) == length(B.blocks) + @simd ivdep for i in eachindex(blocks(C)) + @inbounds $_mul!(C.blocks[i], A, B.blocks[i]) + end + return C + end + @eval $_mul!(C::BlockDiag, A::BlockDiag, B::Number) = begin + @assert length(C.blocks) == length(A.blocks) + @simd ivdep for i in eachindex(blocks(C)) + @inbounds $_mul!(C.blocks[i], A.blocks[i], B) + end + return C + end + + @eval $_mul!( + C::AbstractVector, + A::BlockDiag, + B::AbstractVector, + ) = begin + @assert size(A, 2) == length(B) + @assert length(C) == size(A, 1) + ic, ib = 1, 1 + for i in eachindex(blocks(A)) + d1, d2 = size(A.blocks[i]) + @inbounds $_mul!(view(C, ic:(ic+d1-1)), A.blocks[i], view(B, ib:(ib+d2-1))) + ic += d1 + ib += d2 + end + return C + end + (_mul! == :_matmul!) && @eval $_mul!( + C::AbstractVector{T}, + A::BlockDiag{T}, + B::AbstractVector{T}, + ) where {T<:LinearAlgebra.BlasFloat} = begin + @assert size(A, 2) == length(B) + @assert length(C) == size(A, 1) + ic, ib = 1, 1 + for i in eachindex(blocks(A)) + d1, d2 = size(A.blocks[i]) + @inbounds $_mul!(view(C, ic:(ic+d1-1)), A.blocks[i], view(B, ib:(ib+d2-1))) + ic += d1 + ib += d2 + end + return C + end +end + +LinearAlgebra.rmul!(B::BlockDiag, n::Number) = begin + @simd ivdep for i in eachindex(B.blocks) + rmul!(B.blocks[i], n) + end + return B +end + +LinearAlgebra.inv(A::BlockDiag) = BlockDiag(inv.(blocks(A))) + +copy!(A::BlockDiag, B::Diagonal) = begin + @assert size(A) == size(B) + i = 1 + for Ai in blocks(A) + d = LinearAlgebra.checksquare(Ai) + @views copy!(Ai, Diagonal(B.diag[i:i+d-1])) + i += d + end + return A +end + +Base.:*(D::Diagonal, A::BlockDiag) = begin + @assert size(D, 2) == size(A, 1) + local i = 1 + outblocks = map(blocks(A)) do Ai + d = size(Ai, 1) + outi = Diagonal(view(D.diag, i:(i+d-1))) * Ai + i += d + outi + end + return BlockDiag(outblocks) +end +Base.:*(A::BlockDiag, D::Diagonal) = begin + local i = 1 + outblocks = map(blocks(A)) do Ai + d = size(Ai, 2) + outi = Ai * Diagonal(view(D.diag, i:(i+d-1))) + i += d + outi + end + return BlockDiag(outblocks) +end +for _mul! in (:mul!, :_matmul!) + @eval $_mul!(C::BlockDiag, A::BlockDiag, B::Diagonal) = begin + local i = 1 + @assert nblocks(C) == nblocks(A) + for j in eachindex(blocks(C)) + Ci, Ai = blocks(C)[j], blocks(A)[j] + d = size(Ai, 2) + $_mul!(Ci, Ai, Diagonal(view(B.diag, i:(i+d-1)))) + i += d + end + return C + end + @eval $_mul!(C::BlockDiag, A::Diagonal, B::BlockDiag) = begin + local i = 1 + @assert nblocks(C) == nblocks(B) + for j in eachindex(blocks(C)) + Ci, Bi = blocks(C)[j], blocks(B)[j] + d = size(Bi, 1) + $_mul!(Ci, Diagonal(view(A.diag, i:(i+d-1))), Bi) + i += d + end + return C + end + @eval $_mul!(C::BlockDiag, A::BlockDiag, B::Diagonal, alpha::Number, beta::Number) = + begin + local i = 1 + @assert nblocks(C) == nblocks(A) + for j in eachindex(blocks(C)) + Ci, Ai = blocks(C)[j], blocks(A)[j] + d = size(Ai, 2) + $_mul!(Ci, Ai, Diagonal(view(B.diag, i:(i+d-1))), alpha, beta) + i += d + end + return C + end + @eval $_mul!(C::BlockDiag, A::Diagonal, B::BlockDiag, alpha::Number, beta::Number) = + begin + i = 1 + @assert nblocks(C) == nblocks(B) + for j in eachindex(blocks(C)) + Ci, Bi = blocks(C)[j], blocks(B)[j] + d = size(Bi, 1) + @inbounds $_mul!(Ci, Diagonal(view(A.diag, i:(i+d-1))), Bi, alpha, beta) + i += d + end + return C + end +end + +Base.isequal(A::BlockDiag, B::BlockDiag) = + length(A.blocks) == length(B.blocks) && all(map(isequal, A.blocks, B.blocks)) +==(A::BlockDiag, B::BlockDiag) = + length(A.blocks) == length(B.blocks) && all(map(==, A.blocks, B.blocks)) diff --git a/src/caches.jl b/src/caches.jl index da8e191cd..ab28e0657 100644 --- a/src/caches.jl +++ b/src/caches.jl @@ -5,7 +5,7 @@ mutable struct EKCache{ RType,CFacType,ProjType,SolProjType,PType,PIType,EType,uType,duType,xType,PriorType, AType,QType, FType,LType,FHGMethodType,FHGCacheType, - HType,vecType,matType,bkType,diffusionType,diffModelType,measModType,measType, + HType,vecType,dduType,matType,bkType,diffusionType,diffModelType,measModType,measType, puType,llType,dtType,rateType,UF,JC,uNoUnitsType, } <: AbstractODEFilterCache # Constants @@ -49,7 +49,7 @@ mutable struct EKCache{ pu_tmp::puType H::HType du::duType - ddu::matType + ddu::dduType K1::matType G1::matType Smat::HType @@ -103,7 +103,7 @@ function OrdinaryDiffEq.alg_cache( # uElType = eltype(u_vec) uElType = uBottomEltypeNoUnits - FAC = get_covariance_structure(alg; elType=uElType, d, q) + FAC = alg.covariance_factorization{uElType}(d, q) if FAC isa IsometricKroneckerCovariance && !(f.mass_matrix isa UniformScaling) throw( ArgumentError( @@ -165,7 +165,7 @@ function OrdinaryDiffEq.alg_cache( # Diffusion Model diffmodel = alg.diffusionmodel initdiff = initial_diffusion(diffmodel, d, q, uEltypeNoUnits) - copy!(x0.Σ, apply_diffusion(x0.Σ, initdiff)) + apply_diffusion!(x0.Σ, initdiff) # Measurement model related things R = @@ -178,22 +178,23 @@ function OrdinaryDiffEq.alg_cache( # Caches du = is_secondorder_ode ? similar(u.x[2]) : similar(u) - ddu = factorized_similar(FAC, length(u), length(u)) + ddu = + !isnothing(f.jac_prototype) ? + f.jac_prototype : zeros(uElType, length(u), length(u)) _d = is_secondorder_ode ? 2d : d - pu_tmp = Gaussian( - similar(Array{uElType}, _d), - PSDMatrix( - if FAC isa IsometricKroneckerCovariance - if is_secondorder_ode + pu_tmp = if is_secondorder_ode + Gaussian(similar(Array{uElType}, 2d), + PSDMatrix( + if FAC isa IsometricKroneckerCovariance Kronecker.kronecker(similar(Matrix{uElType}, D ÷ d, _d ÷ d), I(d)) + elseif FAC isa BlockDiagonalCovariance + error("I have no idea") else - factorized_similar(FAC, D, d) - end - else - similar(Matrix{uElType}, D, _d) - end, - ), - ) + similar(Matrix{uElType}, D, _d) + end)) + else + Gaussian(similar(Array{uElType}, d), PSDMatrix(factorized_similar(FAC, D, d))) + end K = factorized_similar(FAC, D, d) G = factorized_similar(FAC, D, D) @@ -240,7 +241,7 @@ function OrdinaryDiffEq.alg_cache( typeof(R),typeof(FAC),typeof(Proj),typeof(SolProj),typeof(P),typeof(PI),typeof(E0), uType,typeof(du),typeof(x0),typeof(prior),typeof(A),typeof(Q), typeof(F),typeof(L),typeof(FHG_method),typeof(FHG_cache), - typeof(H),typeof(C_d),matType, + typeof(H),typeof(C_d),typeof(ddu),matType, typeof(backward_kernel),typeof(initdiff), typeof(diffmodel),typeof(measurement_model),typeof(measurement),typeof(pu_tmp), uEltypeNoUnits,typeof(dt),typeof(du1),typeof(uf),typeof(jac_config),typeof(atmp), diff --git a/src/callbacks/dataupdate.jl b/src/callbacks/dataupdate.jl index c14223cab..def5759ac 100644 --- a/src/callbacks/dataupdate.jl +++ b/src/callbacks/dataupdate.jl @@ -50,33 +50,30 @@ function DataUpdateCallback( val = values[idx] o = length(val) + d = integ.cache.d @unpack x, E0, m_tmp, G1 = integ.cache - H = view(G1, 1:o, :) - if observation_matrix === I - @.. H = E0 - elseif observation_matrix isa UniformScaling - @.. H = observation_matrix.λ * E0 - else - matmul!(H, observation_matrix, E0) - end + M = observation_matrix + H = M * E0 obs_mean = _matmul!(view(m_tmp.μ, 1:o), H, x.μ) obs_mean .-= val R = cov2psdmatrix(observation_noise_cov; d=o) + R = to_factorized_matrix(integ.cache.covariance_factorization, R) # _A = x.Σ.R * H' # obs_cov = _A'_A + R - obs_cov = PSDMatrix(qr!([x.Σ.R * H'; R.R]).R) + obs_cov = PSDMatrix(make_obscov_sqrt(x.Σ.R, H, R.R)) + obs = Gaussian(obs_mean, obs_cov) - @unpack x_tmp, K1, C_DxD, C_dxd, C_Dxd, C_d = integ.cache - K1 = view(K1, :, 1:o) - C_dxd = view(C_dxd, 1:o, 1:o) - C_Dxd = view(C_Dxd, :, 1:o) - C_d = view(C_d, 1:o) - _x = copy!(x_tmp, x) + if o != d && !(integ.alg isa EK1) + error("Partial observations only work with the EK1 right now") + end + _cache = make_obssized_cache(integ.cache; o) + @unpack K1, C_DxD, C_dxd, C_Dxd, C_d = _cache + _x = copy!(integ.cache.x_tmp, x) _, ll = update!(x, _x, obs, H, K1, C_Dxd, C_DxD, C_dxd, C_d; R=R) if !isnothing(loglikelihood) @@ -85,3 +82,37 @@ function DataUpdateCallback( end return PresetTimeCallback(data.t, affect!; save_positions, kwargs...) end + +make_obscov_sqrt(PR::AbstractMatrix, H::AbstractMatrix, RR::AbstractMatrix) = + qr!([PR * H'; RR]).R +make_obscov_sqrt( + PR::IsometricKroneckerProduct, + H::IsometricKroneckerProduct, + RR::IsometricKroneckerProduct, +) = + IsometricKroneckerProduct(PR.ldim, make_obscov_sqrt(PR.B, H.B, RR.B)) +make_obscov_sqrt(PR::BlockDiag, H::BlockDiag, RR::BlockDiag) = + BlockDiag([ + make_obscov_sqrt(blocks(PR)[i], blocks(H)[i], blocks(RR)[i]) for + i in eachindex(blocks(PR)) + ]) + +function make_obssized_cache(cache; o) + if o == cache.d + return cache + else + return make_obssized_cache(cache.covariance_factorization, cache; o) + end +end +function make_obssized_cache(::DenseCovariance, cache; o) + @unpack K1, C_DxD, C_dxd, C_Dxd, C_d, m_tmp, x_tmp = cache + return ( + K1=view(K1, :, 1:o), + C_dxd=view(C_dxd, 1:o, 1:o), + C_Dxd=view(C_Dxd, :, 1:o), + C_d=view(C_d, 1:o), + C_DxD=C_DxD, + m_tmp=Gaussian(view(m_tmp.μ, 1:o), view(m_tmp.Σ, 1:o, 1:o)), + x_tmp=x_tmp, + ) +end diff --git a/src/checks.jl b/src/checks.jl index 6fb213b08..819ebe454 100644 --- a/src/checks.jl +++ b/src/checks.jl @@ -16,7 +16,7 @@ function check_densesmooth(integ) error("To use `dense=true` you need to set `smooth=true`!") end if !integ.opts.save_everystep && integ.alg.smooth - error("If you do not save all values, you do not need to smooth!") + error("If you set `save_everystep=false` also set `smooth=false` in the alg!") end end function check_saveiter(integ) diff --git a/src/covariance_structure.jl b/src/covariance_structure.jl index 7390b5124..2809a3e42 100644 --- a/src/covariance_structure.jl +++ b/src/covariance_structure.jl @@ -7,20 +7,9 @@ struct DenseCovariance{T} <: CovarianceStructure{T} d::Int64 q::Int64 end - -function get_covariance_structure(alg; elType, d, q) - if ( - alg isa EK0 && - !( - alg.diffusionmodel isa DynamicMVDiffusion || - alg.diffusionmodel isa FixedMVDiffusion - ) && - alg.prior isa IWP - ) - return IsometricKroneckerCovariance{elType}(d, q) - else - return DenseCovariance{elType}(d, q) - end +struct BlockDiagonalCovariance{T} <: CovarianceStructure{T} + d::Int64 + q::Int64 end factorized_zeros(C::IsometricKroneckerCovariance{T}, sizes...) where {T} = begin @@ -41,9 +30,34 @@ factorized_zeros(::DenseCovariance{T}, sizes...) where {T} = factorized_similar(::DenseCovariance{T}, size1, size2) where {T} = similar(Matrix{T}, size1, size2) +factorized_zeros(C::BlockDiagonalCovariance{T}, sizes...) where {T} = begin + for s in sizes + @assert s % C.d == 0 + end + return BlockDiag([Array{T}(calloc, (s ÷ C.d for s in sizes)...) for _ in 1:C.d]) +end +factorized_similar(C::BlockDiagonalCovariance{T}, size1, size2) where {T} = begin + for s in (size1, size2) + @assert s % C.d == 0 + end + return BlockDiag([similar(Matrix{T}, size1 ÷ C.d, size2 ÷ C.d) for _ in 1:C.d]) +end + to_factorized_matrix(::DenseCovariance, M::AbstractMatrix) = Matrix(M) to_factorized_matrix(::IsometricKroneckerCovariance, M::IsometricKroneckerProduct) = M -for FT in [:DenseCovariance, :IsometricKroneckerCovariance] +to_factorized_matrix(C::BlockDiagonalCovariance, M::IsometricKroneckerProduct) = + BlockDiag([copy(M.B) for _ in 1:C.d]) +to_factorized_matrix(C::BlockDiagonalCovariance, M::Diagonal) = + copy!(factorized_similar(C, size(M)...), M) +to_factorized_matrix( + C::IsometricKroneckerCovariance, M::Diagonal{<:Number,<:FillArrays.Fill}) = begin + out = factorized_similar(C, size(M)...) + @assert length(out.B) == 1 + out.B .= M.diag.value + out +end + +for FT in [:DenseCovariance, :IsometricKroneckerCovariance, :BlockDiagonalCovariance] @eval to_factorized_matrix(FAC::$FT, M::PSDMatrix) = PSDMatrix(to_factorized_matrix(FAC, M.R)) end diff --git a/src/data_likelihoods/fenrir.jl b/src/data_likelihoods/fenrir.jl index a805f9de9..01e3a3746 100644 --- a/src/data_likelihoods/fenrir.jl +++ b/src/data_likelihoods/fenrir.jl @@ -57,6 +57,7 @@ function fenrir_data_loglik( # Fit the ODE solution / PN posterior to the provided data; this is the actual Fenrir o = length(data.u[1]) R = cov2psdmatrix(observation_noise_cov; d=o) + R = to_factorized_matrix(integ.cache.covariance_factorization, R) LL, _, _ = fit_pnsolution_to_data!(sol, R, data; proj=observation_matrix) return LL @@ -74,17 +75,10 @@ function fit_pnsolution_to_data!( LL = zero(eltype(sol.prob.p)) o = length(data.u[1]) - @unpack x_tmp, C_dxd, C_d, K1, C_Dxd, C_DxD, m_tmp = cache - _cache = ( - x_tmp=x_tmp, - C_DxD=C_DxD, - C_Dxd=view(C_Dxd, :, 1:o), - C_dxd=view(C_dxd, 1:o, 1:o), - C_d=view(C_d, 1:o), - K1=view(K1, :, 1:o), - K2=view(C_Dxd, :, 1:o), - m_tmp=Gaussian(view(m_tmp.μ, 1:o), view(m_tmp.Σ, 1:o, 1:o)), - ) + d = cache.d + @unpack x_tmp, m_tmp = cache + _cache = make_obssized_cache(cache; o) + @unpack K1, C_DxD, C_dxd, C_Dxd, C_d = _cache x_posterior = copy(sol.x_filt) # the object to be filled state2data_projmat = proj * cache.SolProj @@ -136,9 +130,7 @@ function measure_and_update!(x, u, H, R::PSDMatrix, cache) z, S = cache.m_tmp _matmul!(z, H, x.μ) z .-= u - _matmul!(cache.C_Dxd, x.Σ.R, H') - _matmul!(S, cache.C_Dxd', cache.C_Dxd) - S .+= _matmul!(cache.C_dxd, R.R', R.R) + S = PSDMatrix(make_obscov_sqrt(x.Σ.R, H, R.R)) msmnt = Gaussian(z, S) return update!(x, copy!(cache.x_tmp, x), msmnt, H; R=R, cache) diff --git a/src/derivative_utils.jl b/src/derivative_utils.jl index 9b84fb1c8..bbfae4482 100644 --- a/src/derivative_utils.jl +++ b/src/derivative_utils.jl @@ -8,7 +8,15 @@ function calc_H!(H, integ, cache) calc_H_EK0!(H, integ, cache) # @assert integ.u == @view x_pred.μ[1:(q+1):end] OrdinaryDiffEq.calc_J!(ddu, integ, cache, true) - ProbNumDiffEq._matmul!(H, view(ddu, 1:d, :), cache.SolProj, -1.0, 1.0) + _ddu = size(ddu, 2) != d ? view(ddu, 1:d, :) : ddu + _matmul!(H, _ddu, cache.SolProj, -1.0, 1.0) + elseif integ.alg isa DiagonalEK1 + calc_H_EK0!(H, integ, cache) + OrdinaryDiffEq.calc_J!(ddu, integ, cache, true) + ddu_diag = Diagonal(ddu) + _matmul!(H, ddu_diag, cache.SolProj, -1.0, 1.0) + else + error("Unknown algorithm") end return nothing end diff --git a/src/diffusions.jl b/src/diffusions.jl deleted file mode 100644 index 3570a22f7..000000000 --- a/src/diffusions.jl +++ /dev/null @@ -1,213 +0,0 @@ -abstract type AbstractDiffusion end -abstract type AbstractStaticDiffusion <: AbstractDiffusion end -abstract type AbstractDynamicDiffusion <: AbstractDiffusion end -isstatic(diffusion::AbstractStaticDiffusion) = true -isdynamic(diffusion::AbstractStaticDiffusion) = false -isstatic(diffusion::AbstractDynamicDiffusion) = false -isdynamic(diffusion::AbstractDynamicDiffusion) = true - -apply_diffusion(Q::PSDMatrix, diffusion::Diagonal) = X_A_Xt(Q, sqrt.(diffusion)) -apply_diffusion(Q::PSDMatrix, diffusion::Number) = PSDMatrix(Q.R * sqrt.(diffusion)) - -estimate_global_diffusion(diffusion::AbstractDynamicDiffusion, d, q, Eltype) = NaN - -""" - DynamicDiffusion() - -Time-varying, isotropic diffusion, which is quasi-maximum-likelihood-estimated at each step. - -**This is the recommended diffusion when using adaptive step-size selection,** and in -particular also when solving stiff systems. -""" -struct DynamicDiffusion <: AbstractDynamicDiffusion end -initial_diffusion(::DynamicDiffusion, d, q, Eltype) = one(Eltype) -estimate_local_diffusion(::DynamicDiffusion, integ) = local_scalar_diffusion(integ.cache) - -""" - DynamicMVDiffusion() - -Time-varying, diagonal diffusion, which is quasi-maximum-likelihood-estimated at each step. - -**Only works with the [`EK0`](@ref)!** - -A multi-variate version of [`DynamicDiffusion`](@ref), where instead of an isotropic matrix, -a diagonal matrix is estimated. This can be helpful to get more expressive posterior -covariances when using the [`EK0`](@ref), since the individual dimensions can be adjusted -separately. - -# References -* [bosch20capos](@cite) Bosch et al, "Calibrated Adaptive Probabilistic ODE Solvers", AISTATS (2021) -""" -struct DynamicMVDiffusion <: AbstractDynamicDiffusion end -initial_diffusion(::DynamicMVDiffusion, d, q, Eltype) = - kron(Diagonal(ones(Eltype, d)), Diagonal(ones(Eltype, q + 1))) -estimate_local_diffusion(::DynamicMVDiffusion, integ) = - local_diagonal_diffusion(integ.cache) - -""" - FixedDiffusion(; initial_diffusion=1.0, calibrate=true) - -Time-fixed, isotropic diffusion, which is (optionally) quasi-maximum-likelihood-estimated. - -**This is the recommended diffusion when using fixed steps.** - -By default with `calibrate=true`, all covariances are re-scaled at the end of the solve -with the MLE diffusion. Set `calibrate=false` to skip this step, e.g. when setting the -`initial_diffusion` and then estimating the diffusion outside of the solver -(e.g. with [Fenrir.jl](https://github.com/nathanaelbosch/Fenrir.jl)). -""" -Base.@kwdef struct FixedDiffusion{T<:Number} <: AbstractStaticDiffusion - initial_diffusion::T = 1.0 - calibrate::Bool = true -end -initial_diffusion(diffusionmodel::FixedDiffusion, d, q, Eltype) = - diffusionmodel.initial_diffusion * one(Eltype) -estimate_local_diffusion(::FixedDiffusion, integ) = local_scalar_diffusion(integ.cache) -function estimate_global_diffusion(::FixedDiffusion, integ) - @unpack d, measurement, m_tmp, Smat = integ.cache - # sol_diffusions = integ.sol.diffusions - - v, S = measurement.μ, measurement.Σ - e = m_tmp.μ - e .= v - diffusion_t = if S isa IsometricKroneckerProduct - @assert length(S.B) == 1 - dot(v, e) / d / S.B[1] - else - S_chol = cholesky!(S) - ldiv!(S_chol, e) - dot(v, e) / d - end - - if integ.success_iter == 0 - # @assert length(sol_diffusions) == 0 - global_diffusion = diffusion_t - return global_diffusion - else - # @assert length(sol_diffusions) == integ.success_iter - diffusion_prev = integ.cache.global_diffusion - global_diffusion = - diffusion_prev + (diffusion_t - diffusion_prev) / integ.success_iter - # @info "compute diffusion" diffusion_prev global_diffusion - return global_diffusion - end -end - -""" - FixedMVDiffusion(; initial_diffusion=1.0, calibrate=true) - -Time-fixed, diagonal diffusion, which is quasi-maximum-likelihood-estimated at each step. - -**Only works with the [`EK0`](@ref)!** - -A multi-variate version of [`FixedDiffusion`](@ref), where instead of an isotropic matrix, -a diagonal matrix is estimated. This can be helpful to get more expressive posterior -covariances when using the [`EK0`](@ref), since the individual dimensions can be adjusted -separately. - -# References -* [bosch20capos](@cite) Bosch et al, "Calibrated Adaptive Probabilistic ODE Solvers", AISTATS (2021) -""" -Base.@kwdef struct FixedMVDiffusion{T} <: AbstractStaticDiffusion - initial_diffusion::T = 1.0 - calibrate::Bool = true -end -function initial_diffusion(diffusionmodel::FixedMVDiffusion, d, q, Eltype) - initdiff = diffusionmodel.initial_diffusion - @assert initdiff isa Number || length(initdiff) == d - return kron(Diagonal(initdiff .* ones(Eltype, d)), Diagonal(ones(Eltype, q + 1))) -end -estimate_local_diffusion(::FixedMVDiffusion, integ) = local_diagonal_diffusion(integ.cache) -function estimate_global_diffusion(::FixedMVDiffusion, integ) - @unpack d, q, measurement, local_diffusion = integ.cache - - v, S = measurement.μ, measurement.Σ - # @assert diag(S) |> unique |> length == 1 - S_11 = S[1, 1] - - Σ_ii = v .^ 2 ./ S_11 - Σ = Diagonal(Σ_ii) - Σ_out = kron(Σ, I(q + 1)) # -> Different for each dimension; same for each derivative - - if integ.success_iter == 0 - # @assert length(diffusions) == 0 - return Σ_out - else - # @assert length(diffusions) == integ.success_iter - diffusion_prev = integ.cache.global_diffusion - diffusion = - @. diffusion_prev = - diffusion_prev + (Σ_out - diffusion_prev) / integ.success_iter - return diffusion - end -end - -""" - local_scalar_diffusion(integ) - -Compute the local, scalar diffusion estimate. - -Corresponds to -```math -σ² = zᵀ (H Q H^T)⁻¹ z, -``` -where ``z, H, Q`` are taken from the passed integrator. - -For more background information -* [bosch20capos](@cite) Bosch et al, "Calibrated Adaptive Probabilistic ODE Solvers", AISTATS (2021) -""" -function local_scalar_diffusion(cache) - @unpack d, R, H, Qh, measurement, m_tmp, Smat, C_Dxd = cache - z = measurement.μ - e, HQH = m_tmp.μ, m_tmp.Σ - _matmul!(C_Dxd, Qh.R, H') - _matmul!(HQH, C_Dxd', C_Dxd) - e .= z - σ² = if HQH isa IsometricKroneckerProduct - @assert length(HQH.B) == 1 - dot(z, e) / d / HQH.B[1] - else - C = cholesky!(HQH) - ldiv!(C, e) - dot(z, e) / d - end - return σ² -end - -""" - local_diagonal_diffusion(cache) - -Compute the local, scalar diffusion estimate. - -Corresponds to -```math -Σ_{ii} = z_i^2 / (H Q H^T)_{ii}, -``` -where ``z, H, Q`` are taken from the passed integrator. -**This should only be used with the EK0!** - -For more background information -* [bosch20capos](@cite) Bosch et al, "Calibrated Adaptive Probabilistic ODE Solvers", AISTATS (2021) -""" -function local_diagonal_diffusion(cache) - @unpack d, q, H, Qh, measurement, m_tmp, tmp = cache - @unpack local_diffusion = cache - z = measurement.μ - # HQH = H * unfactorize(Qh) * H' - # @assert HQH |> diag |> unique |> length == 1 - # c1 = view(_matmul!(cache.C_Dxd, Qh.R, H'), :, 1) - c1 = mul!(view(cache.C_Dxd, :, 1:1), Qh.R, view(H, 1:1, :)') - Q0_11 = dot(c1, c1) - - Σ_ii = @. m_tmp.μ = z^2 / Q0_11 - Σ = Diagonal(Σ_ii) - - # local_diffusion = kron(Σ, I(q+1)) - # -> Different for each dimension; same for each derivative - for i in 1:d - for j in (i-1)*(q+1)+1:i*(q+1) - local_diffusion[j, j] = Σ[i, i] - end - end - return local_diffusion -end diff --git a/src/diffusions/apply_diffusion.jl b/src/diffusions/apply_diffusion.jl new file mode 100644 index 000000000..9638ba587 --- /dev/null +++ b/src/diffusions/apply_diffusion.jl @@ -0,0 +1,102 @@ +""" + apply_diffusion(Q::PSDMatrix, diffusion::Union{Number, Diagonal}) -> PSDMatrix + +Apply the diffusion to the PSD transition noise covariance `Q`, return the result. +""" +apply_diffusion( + Q::PSDMatrix, + diffusion::Number, +) = PSDMatrix(Q.R * sqrt.(diffusion)) +apply_diffusion( + Q::PSDMatrix, + diffusion::Diagonal{T,<:FillArrays.Fill}, +) where {T} = apply_diffusion(Q, diffusion.diag.value) +apply_diffusion( + Q::PSDMatrix{T,<:BlockDiag}, + diffusion::Diagonal{T,<:Vector}, +) where {T} = PSDMatrix( + BlockDiag([blocks(Q.R)[i] * sqrt.(diffusion.diag[i]) for i in eachindex(blocks(Q.R))])) +apply_diffusion( + Q::PSDMatrix{T,<:Matrix}, + diffusion::Diagonal{T,<:Vector}, +) where {T} = begin + d = size(diffusion, 1) + q = size(Q, 1) ÷ d - 1 + return PSDMatrix(Q.R * sqrt.(Kronecker.kronecker(diffusion, Eye(q + 1)))) +end + +""" + apply_diffusion!(Q::PSDMatrix, diffusion::Union{Number, Diagonal}) -> PSDMatrix + +Apply the diffusion to the PSD transition noise covariance `Q` in place and return the result. +""" +apply_diffusion!( + Q::PSDMatrix, + diffusion::Diagonal{T,<:FillArrays.Fill}, +) where {T} = begin + rmul!(Q.R, sqrt.(diffusion.diag.value)) + return Q +end +apply_diffusion!( + Q::PSDMatrix{T,<:BlockDiag}, + diffusion::Diagonal{T,<:Vector}, +) where {T} = begin + @simd ivdep for i in eachindex(blocks(Q.R)) + rmul!(blocks(Q.R)[i], sqrt(diffusion.diag[i])) + end + return Q +end +apply_diffusion!( + Q::PSDMatrix, + diffusion::Diagonal, +) = begin + # @warn "This is not yet implemented efficiently; TODO" + d = size(diffusion, 1) + D = size(Q, 1) + q = D ÷ d - 1 + # _matmul!(Q.R, Q.R, Kronecker.kronecker(sqrt.(diffusion), Eye(q + 1))) + _matmul!(Q.R, Q.R, kron(sqrt.(diffusion), Eye(q + 1))) + return Q +end + +""" + apply_diffusion!(out::PSDMatrix, Q::PSDMatrix, diffusion::Union{Number, Diagonal}) -> PSDMatrix + +Apply the diffusion to the PSD transition noise covariance `Q` and store the result in `out`. +""" +apply_diffusion!( + out::PSDMatrix, + Q::PSDMatrix, + diffusion::Number, +) = begin + _matmul!(out.R, Q.R, sqrt.(diffusion)) + return out +end +apply_diffusion!( + out::PSDMatrix, + Q::PSDMatrix, + diffusion::Diagonal{<:Number,<:FillArrays.Fill}, +) = apply_diffusion!(out, Q, diffusion.diag.value) +apply_diffusion!( + out::PSDMatrix{T,<:BlockDiag}, + Q::PSDMatrix{T,<:BlockDiag}, + diffusion::Diagonal{<:T,<:Vector}, +) where {T} = begin + @simd ivdep for i in eachindex(blocks(Q.R)) + _matmul!(blocks(out.R)[i], blocks(Q.R)[i], sqrt(diffusion.diag[i])) + end + return out +end +apply_diffusion!( + out::PSDMatrix, + Q::PSDMatrix, + diffusion::Diagonal, +) = begin + # @warn "This is not yet implemented efficiently; TODO" + d = size(diffusion, 1) + D = size(Q, 1) + q = D ÷ d - 1 + # _matmul!(out.R, Q.R, Kronecker.kronecker(sqrt.(diffusion), Eye(q + 1))) + _matmul!(out.R, Q.R, kron(sqrt.(diffusion), Eye(q + 1))) + return out +end diff --git a/src/diffusions/calibration.jl b/src/diffusions/calibration.jl new file mode 100644 index 000000000..6b22b3da3 --- /dev/null +++ b/src/diffusions/calibration.jl @@ -0,0 +1,178 @@ +@doc raw""" + invquad(v, M; v_cache, M_cache) + +Compute ``v' M^{-1} v`` without allocations and with Matrix-specific specializations. + +Needed for MLE diffusion estimation. +""" +invquad +invquad(v, M::Matrix; v_cache, M_cache) = begin + v_cache .= v + M_chol = cholesky!(copy!(M_cache, M)) + ldiv!(M_chol, v_cache) + dot(v, v_cache) +end +invquad(v, M::IsometricKroneckerProduct; v_cache, M_cache=nothing) = begin + v_cache .= v + @assert length(M.B) == 1 + return dot(v, v_cache) / M.B[1] +end +invquad(v, M::BlockDiag; v_cache, M_cache=nothing) = begin + v_cache .= v + @assert length(M.blocks) == length(v) == length(v_cache) + @simd ivdep for i in eachindex(v) + @assert length(M.blocks[i]) == 1 + @inbounds v_cache[i] /= M.blocks[i][1] + end + return dot(v, v_cache) +end + +@doc raw""" + estimate_global_diffusion(::FixedDiffusion, integ) + +Updates the global quasi-MLE diffusion estimate on the current measuremnt. + +The global quasi-MLE diffusion estimate Corresponds to +```math +\hat{σ}^2_N = \frac{1}{Nd} \sum_{i=1}^N z_i^T S_i^{-1} z_i, +``` +where ``z_i, S_i`` are taken the predicted observations from each step. +This function updates the iteratively computed global diffusion estimate by computing +```math +\hat{σ}^2_n = \hat{σ}^2_{n-1} + ((z_n^T S_n^{-1} z_n) / d - \hat{σ}^2_{n-1}) / n. +``` + +For more background information +* [bosch20capos](@cite) Bosch et al, "Calibrated Adaptive Probabilistic ODE Solvers", AISTATS (2021) +""" +function estimate_global_diffusion(::FixedDiffusion, integ) + @unpack d, measurement, m_tmp, Smat = integ.cache + v, S = measurement.μ, measurement.Σ + _v, _S = m_tmp.μ, m_tmp.Σ + + diffusion_increment = invquad(v, S; v_cache=_v, M_cache=_S) / d + + new_mle_diffusion = if integ.success_iter == 0 + diffusion_increment + else + current_mle_diffusion = integ.cache.global_diffusion.diag.value + current_mle_diffusion + + (diffusion_increment - current_mle_diffusion) / integ.success_iter + end + + integ.cache.global_diffusion = new_mle_diffusion * Eye(d) + return integ.cache.global_diffusion +end + +@doc raw""" + estimate_global_diffusion(::FixedMVDiffusion, integ) + +Updates the multivariate global quasi-MLE diffusion estimate on the current measuremnt. + +**This only works with the EK0!** + +The global quasi-MLE diffusion estimate Corresponds to +```math +[\hat{Σ}^2_N]_{jj} = \frac{1}{N} \sum_{i=1}^N [z_i]_j^2 / [S_i]_{11}, +``` +where ``z_i, S_i`` are taken the predicted observations from each step. +This function updates the iteratively computed global diffusion estimate by computing +```math +[\hat{Σ}^2_n]_{jj} = [\hat{Σ}^2_{n-1}]_{jj} + ([z_n]_j^2 / [S_n]_{11}, - [\hat{Σ}^2_{n-1}]_{jj}) / n. +``` + +For more background information +* [bosch20capos](@cite) Bosch et al, "Calibrated Adaptive Probabilistic ODE Solvers", AISTATS (2021) +""" +function estimate_global_diffusion(::FixedMVDiffusion, integ) + @unpack d, q, measurement, local_diffusion, C_d = integ.cache + v, S = measurement.μ, measurement.Σ + # @assert diag(S) |> unique |> length == 1 + diffusion_increment = let + @.. C_d = v^2 / S[1, 1] + Diagonal(C_d) + end + + new_mle_diffusion = if integ.success_iter == 0 + diffusion_increment + else + current_mle_diffusion = integ.cache.global_diffusion + @.. current_mle_diffusion + + (diffusion_increment - current_mle_diffusion) / integ.success_iter + end + + copy!(integ.cache.global_diffusion, new_mle_diffusion) + return integ.cache.global_diffusion +end + +@doc raw""" + local_scalar_diffusion(integ) + +Compute the local scalar quasi-MLE diffusion estimate. + +Corresponds to +```math +σ² = zᵀ (H Q H^T)⁻¹ z, +``` +where ``z, H, Q`` are taken from the passed integrator. + +For more background information +* [bosch20capos](@cite) Bosch et al, "Calibrated Adaptive Probabilistic ODE Solvers", AISTATS (2021) +""" +function local_scalar_diffusion(cache) + @unpack d, R, H, Qh, measurement, m_tmp, Smat, C_Dxd, C_d, C_dxd = cache + z = measurement.μ + HQH = let + _matmul!(C_Dxd, Qh.R, H') + _matmul!(C_dxd, C_Dxd', C_Dxd) + end + σ² = invquad(z, HQH; v_cache=C_d, M_cache=C_dxd) / d + cache.local_diffusion = σ² * Eye(d) + return cache.local_diffusion +end + +@doc raw""" + local_diagonal_diffusion(cache) + +Compute the local diagonal quasi-MLE diffusion estimate. + +**This only works with the EK0!** + +Corresponds to +```math +Σ_{ii} = z_i^2 / (H Q H^T)_{ii}, +``` +where ``z, H, Q`` are taken from the passed integrator. + +For more background information +* [bosch20capos](@cite) Bosch et al, "Calibrated Adaptive Probabilistic ODE Solvers", AISTATS (2021) +""" +function local_diagonal_diffusion(cache) + @unpack d, q, H, Qh, measurement, m_tmp, tmp = cache + @unpack local_diffusion = cache + @assert H == cache.E1 + + z = measurement.μ + # HQH = H * unfactorize(Qh) * H' + # @assert HQH |> diag |> unique |> length == 1 + # c1 = view(_matmul!(cache.C_Dxd, Qh.R, H'), :, 1) + # Q_11 = dot(c1, c1) + + Q_11 = if Qh.R isa BlockDiag + for i in 1:d + c1 = _matmul!( + view(cache.C_Dxd.blocks[i], :, 1:1), + Qh.R.blocks[i], + view(H.blocks[i], 1:1, :)', + ) + tmp[i] = dot(c1, c1) + end + tmp + else + @warn "This is not yet implemented efficiently; TODO" + diag(X_A_Xt(Qh, H)) + end + + @. local_diffusion.diag = z^2 / Q_11 + return local_diffusion +end diff --git a/src/diffusions/typedefs.jl b/src/diffusions/typedefs.jl new file mode 100644 index 000000000..1ebfb1954 --- /dev/null +++ b/src/diffusions/typedefs.jl @@ -0,0 +1,103 @@ +abstract type AbstractDiffusion end +abstract type AbstractStaticDiffusion <: AbstractDiffusion end +abstract type AbstractDynamicDiffusion <: AbstractDiffusion end +isstatic(diffusion::AbstractStaticDiffusion) = true +isdynamic(diffusion::AbstractStaticDiffusion) = false +isstatic(diffusion::AbstractDynamicDiffusion) = false +isdynamic(diffusion::AbstractDynamicDiffusion) = true + +estimate_global_diffusion(diffusion::AbstractDynamicDiffusion, d, q, Eltype) = + error("Not possible or not implemented") + +""" + DynamicDiffusion() + +Time-varying, isotropic diffusion, which is quasi-maximum-likelihood-estimated at each step. + +**This is the recommended diffusion when using adaptive step-size selection,** and in +particular also when solving stiff systems. +""" +struct DynamicDiffusion <: AbstractDynamicDiffusion end +initial_diffusion(::DynamicDiffusion, d, q, Eltype) = one(Eltype) * Eye(d) +estimate_local_diffusion(::DynamicDiffusion, integ) = local_scalar_diffusion(integ.cache) + +""" + DynamicMVDiffusion() + +Time-varying, diagonal diffusion, which is quasi-maximum-likelihood-estimated at each step. + +**Only works with the [`EK0`](@ref)!** + +A multi-variate version of [`DynamicDiffusion`](@ref), where instead of an isotropic matrix, +a diagonal matrix is estimated. This can be helpful to get more expressive posterior +covariances when using the [`EK0`](@ref), since the individual dimensions can be adjusted +separately. + +# References +* [bosch20capos](@cite) Bosch et al, "Calibrated Adaptive Probabilistic ODE Solvers", AISTATS (2021) +""" +struct DynamicMVDiffusion <: AbstractDynamicDiffusion end +initial_diffusion(::DynamicMVDiffusion, d, q, Eltype) = Diagonal(ones(Eltype, d)) +estimate_local_diffusion(::DynamicMVDiffusion, integ) = + local_diagonal_diffusion(integ.cache) + +""" + FixedDiffusion(; initial_diffusion=1.0, calibrate=true) + +Time-fixed, isotropic diffusion, which is (optionally) quasi-maximum-likelihood-estimated. + +**This is the recommended diffusion when using fixed steps.** + +By default with `calibrate=true`, all covariances are re-scaled at the end of the solve +with the MLE diffusion. Set `calibrate=false` to skip this step, e.g. when setting the +`initial_diffusion` and then estimating the diffusion outside of the solver +(e.g. with [Fenrir.jl](https://github.com/nathanaelbosch/Fenrir.jl)). +""" +Base.@kwdef struct FixedDiffusion{T<:Number} <: AbstractStaticDiffusion + initial_diffusion::T = 1.0 + calibrate::Bool = true +end +initial_diffusion(diffusionmodel::FixedDiffusion, d, q, Eltype) = + diffusionmodel.initial_diffusion * one(Eltype) * Eye(d) +estimate_local_diffusion(::FixedDiffusion, integ) = local_scalar_diffusion(integ.cache) + +""" + FixedMVDiffusion(; initial_diffusion=1.0, calibrate=true) + +Time-fixed, diagonal diffusion, which is quasi-maximum-likelihood-estimated at each step. + +**Only works with the [`EK0`](@ref)!** + +A multi-variate version of [`FixedDiffusion`](@ref), where instead of an isotropic matrix, +a diagonal matrix is estimated. This can be helpful to get more expressive posterior +covariances when using the [`EK0`](@ref), since the individual dimensions can be adjusted +separately. + +# References +* [bosch20capos](@cite) Bosch et al, "Calibrated Adaptive Probabilistic ODE Solvers", AISTATS (2021) +""" +Base.@kwdef struct FixedMVDiffusion{T} <: AbstractStaticDiffusion + initial_diffusion::T = 1.0 + calibrate::Bool = true +end +function initial_diffusion(diffusionmodel::FixedMVDiffusion, d, q, Eltype) + initdiff = diffusionmodel.initial_diffusion + if initdiff isa Number + return initdiff * one(Eltype) * I(d) + elseif initdiff isa AbstractVector + @assert length(initdiff) == d + return Diagonal(initdiff) + elseif initdiff isa Diagonal + @assert size(initdiff) == (d, d) + return initdiff + else + throw( + ArgumentError( + "Invalid `initial_diffusion`. The `FixedMVDiffusion` assumes a dxd diagonal diffusion model. So, pass either a Number, a Vector of length d, or a `Diagonal`.", + ), + ) + end +end +estimate_local_diffusion(::FixedMVDiffusion, integ) = + integ.alg isa EK0 ? local_diagonal_diffusion(integ.cache) : + local_scalar_diffusion(integ.cache) diff --git a/src/fast_linalg.jl b/src/fast_linalg.jl index b81ad9924..cb3a549e1 100644 --- a/src/fast_linalg.jl +++ b/src/fast_linalg.jl @@ -13,46 +13,34 @@ _matmul!(C, A, B) _matmul!(C, A, B) = mul!(C, A, B) _matmul!(C, A, B, a, b) = mul!(C, A, B, a, b) # Use Octavian.jl's matrix-matrix products whenever applicable +const MSR{T} = Union{Matrix{T},SubArray{T},Base.ReshapedArray{T},Adjoint{T,<:Matrix}} _matmul!( - C::AbstractMatrix{T}, - A::AbstractMatrix{T}, - B::AbstractMatrix{T}, + C::MSR{T}, + A::MSR{T}, + B::MSR{T}, alpha::Number, beta::Number, ) where {T<:LinearAlgebra.BlasFloat} = matmul!(C, A, B, alpha, beta) _matmul!( - C::AbstractMatrix{T}, - A::AbstractMatrix{T}, - B::AbstractMatrix{T}, + C::MSR{T}, + A::MSR{T}, + B::MSR{T}, ) where {T<:LinearAlgebra.BlasFloat} = matmul!(C, A, B) # Some exceptions where we'd rather broadcast with FastBroadcast.jl: # Matrix-scalar products _matmul!(C::AbstractVecOrMat, A::AbstractVecOrMat, b::Number) = @.. C = A * b _matmul!(C::AbstractVecOrMat, a::Number, B::AbstractVecOrMat) = @.. C = a * B # Matrix matrix products with diagonal matrices -_matmul!(C::AbstractMatrix, A::AbstractMatrix, B::Diagonal) = (@.. C = A * B.diag') -_matmul!(C::AbstractMatrix, A::Diagonal, B::AbstractMatrix) = (@.. C = A.diag * B) -_matmul!(C::AbstractMatrix, A::Diagonal, B::Diagonal) = @.. C = A * B -_matmul!( - C::AbstractMatrix{T}, - A::AbstractMatrix{T}, - B::Diagonal{T}, -) where {T<:LinearAlgebra.BlasFloat} = (@.. C = A * B.diag') -_matmul!( - C::AbstractMatrix{T}, - A::Diagonal{T}, - B::AbstractMatrix{T}, -) where {T<:LinearAlgebra.BlasFloat} = (@.. C = A.diag * B) -_matmul!( - C::AbstractMatrix{T}, - A::Diagonal{T}, - B::Diagonal{T}, -) where {T<:LinearAlgebra.BlasFloat} = @.. C = A * B -_matmul!( - C::AbstractMatrix{T}, - A::LowerTriangular{T}, - B::UpperTriangular{T}, -) where {T<:LinearAlgebra.BlasFloat} = mul!(C, A, B) +_matmul!(C::MSR, A::MSR, B::Diagonal) = + @.. C = A * B.diag' +_matmul!(C::MSR, A::Diagonal, B::MSR) = + (@.. C = A.diag * B) +_matmul!(C::MSR, A::Diagonal, B::Diagonal) = + (@.. C = A * B) +_matmul!(C::MSR, A::MSR, B::Diagonal, alpha::Number, beta::Number) = + @.. C = A * B.diag' * alpha + C * beta +_matmul!(C::MSR, A::Diagonal, B::MSR, alpha::Number, beta::Number) = + (@.. C = A.diag * B * alpha + C * beta) """ getupperright!(A) diff --git a/src/filtering/markov_kernel.jl b/src/filtering/markov_kernel.jl index e980108dd..07873ede1 100644 --- a/src/filtering/markov_kernel.jl +++ b/src/filtering/markov_kernel.jl @@ -120,6 +120,28 @@ function marginalize_cov!( return marginalize_cov!(_Σ_out, _Σ_curr, _K; C_DxD=_C_DxD, C_3DxD=_C_3DxD) end +function marginalize_cov!( + Σ_out::PSDMatrix{T,<:BlockDiag}, + Σ_curr::PSDMatrix{T,<:BlockDiag}, + K::AffineNormalKernel{ + <:AbstractMatrix, + <:Any, + <:PSDMatrix{S,<:BlockDiag}, + }; + C_DxD::AbstractMatrix, + C_3DxD::AbstractMatrix, +) where {T,S} + @simd ivdep for i in eachindex(blocks(Σ_out.R)) + _Σ_out = PSDMatrix(Σ_out.R.blocks[i]) + _Σ_curr = PSDMatrix(Σ_curr.R.blocks[i]) + _K = AffineNormalKernel(K.A.blocks[i], K.b, PSDMatrix(K.C.R.blocks[i])) + _C_DxD = C_DxD.blocks[i] + _C_3DxD = C_3DxD.blocks[i] + marginalize_cov!(_Σ_out, _Σ_curr, _K; C_DxD=_C_DxD, C_3DxD=_C_3DxD) + end + return Σ_out +end + """ compute_backward_kernel!(Kout, xpred, x, K; C_DxD[, diffusion=1]) @@ -200,11 +222,11 @@ function compute_backward_kernel!( end _matmul!(view(Λ.R, 1:D, 1:D), x.Σ.R, C_DxD) # Λ.R[D+1:2D, 1:D] = (G * Q.R')' - if !isone(diffusion) - _matmul!(C_DxD, Q.R, sqrt.(diffusion)) - _matmul!(view(Λ.R, D+1:2D, 1:D), C_DxD, G') - else + if isone(diffusion) _matmul!(view(Λ.R, D+1:2D, 1:D), Q.R, G') + else + apply_diffusion!(PSDMatrix(C_DxD), Q, diffusion) + _matmul!(view(Λ.R, D+1:2D, 1:D), C_DxD, G') end return Kout @@ -216,7 +238,7 @@ function compute_backward_kernel!( x::SRGaussian{T,<:IsometricKroneckerProduct}, K::KT2; C_DxD::AbstractMatrix, - diffusion=1, + diffusion::Union{Number,Diagonal}=1, ) where { T, KT1<:AffineNormalKernel{ @@ -239,7 +261,61 @@ function compute_backward_kernel!( _x = Gaussian(reshape_no_alloc(x.μ, Q, d), PSDMatrix(x.Σ.R.B)) _K = AffineNormalKernel(K.A.B, reshape_no_alloc(K.b, Q, d), PSDMatrix(K.C.R.B)) _C_DxD = C_DxD.B + _diffusion = + diffusion isa Number ? diffusion : + diffusion isa IsometricKroneckerProduct ? diffusion.B : diffusion return compute_backward_kernel!( - _Kout, _x_pred, _x, _K; C_DxD=_C_DxD, diffusion=diffusion) + _Kout, _x_pred, _x, _K; C_DxD=_C_DxD, diffusion=_diffusion) +end + +function compute_backward_kernel!( + Kout::KT1, + xpred::SRGaussian{T,<:BlockDiag}, + x::SRGaussian{T,<:BlockDiag}, + K::KT2; + C_DxD::AbstractMatrix, + diffusion::Union{Number,Diagonal}=1, +) where { + T, + KT1<:AffineNormalKernel{ + <:BlockDiag, + <:AbstractVector, + <:PSDMatrix{T,<:BlockDiag}, + }, + KT2<:AffineNormalKernel{ + <:BlockDiag, + <:Any, + <:PSDMatrix{T,<:BlockDiag}, + }, +} + d = length(blocks(xpred.Σ.R)) + q = size(blocks(xpred.Σ.R)[1], 1) - 1 + + @simd ivdep for i in eachindex(blocks(xpred.Σ.R)) + _Kout = AffineNormalKernel( + Kout.A.blocks[i], + view(Kout.b, (i-1)*(q+1)+1:i*(q+1)), + PSDMatrix(Kout.C.R.blocks[i]), + ) + _xpred = Gaussian( + view(xpred.μ, (i-1)*(q+1)+1:i*(q+1)), + PSDMatrix(xpred.Σ.R.blocks[i]), + ) + _x = Gaussian( + view(x.μ, (i-1)*(q+1)+1:i*(q+1)), + PSDMatrix(x.Σ.R.blocks[i]), + ) + _K = AffineNormalKernel( + K.A.blocks[i], + ismissing(K.b) ? missing : view(K.b, (i-1)*(q+1)+1:i*(q+1)), + PSDMatrix(K.C.R.blocks[i]), + ) + _C_DxD = C_DxD.blocks[i] + _diffusion = diffusion isa Number ? diffusion : diffusion.diag[i] + compute_backward_kernel!( + _Kout, _xpred, _x, _K, C_DxD=_C_DxD, diffusion=_diffusion, + ) + end + return Kout end diff --git a/src/filtering/predict.jl b/src/filtering/predict.jl index 8738d74fa..b37fc5bd3 100644 --- a/src/filtering/predict.jl +++ b/src/filtering/predict.jl @@ -66,7 +66,7 @@ function predict_cov!( Qh::PSDMatrix, C_DxD::AbstractMatrix, C_2DxD::AbstractMatrix, - diffusion=1, + diffusion::Union{Number,Diagonal}, ) if iszero(diffusion) fast_X_A_Xt!(Σ_out, Σ_curr, Ah) @@ -76,10 +76,10 @@ function predict_cov!( D = size(Qh, 1) _matmul!(view(R, 1:D, 1:D), Σ_curr.R, Ah') - if !isone(diffusion) - _matmul!(view(R, D+1:2D, 1:D), Qh.R, sqrt.(diffusion)) - else + if isone(diffusion) @.. R[D+1:2D, 1:D] = Qh.R + else + apply_diffusion!(PSDMatrix(view(R, D+1:2D, 1:D)), Qh, diffusion) end _matmul!(M, R', R) chol = cholesky!(Symmetric(M), check=false) @@ -101,7 +101,7 @@ function predict_cov!( Qh::PSDMatrix{S,<:IsometricKroneckerProduct}, C_DxD::IsometricKroneckerProduct, C_2DxD::IsometricKroneckerProduct, - diffusion=1, + diffusion::Union{Number,Diagonal}, ) where {T,S} _Σ_out = PSDMatrix(Σ_out.R.B) _Σ_curr = PSDMatrix(Σ_curr.R.B) @@ -109,7 +109,33 @@ function predict_cov!( _Qh = PSDMatrix(Qh.R.B) _C_DxD = C_DxD.B _C_2DxD = C_2DxD.B - _diffusion = diffusion isa IsometricKroneckerProduct ? diffusion.B : diffusion + _diffusion = + diffusion isa Number ? diffusion : + diffusion isa IsometricKroneckerProduct ? diffusion.B : diffusion return predict_cov!(_Σ_out, _Σ_curr, _Ah, _Qh, _C_DxD, _C_2DxD, _diffusion) end + +# BlockDiagonal version +function predict_cov!( + Σ_out::PSDMatrix{T,<:BlockDiag}, + Σ_curr::PSDMatrix{T,<:BlockDiag}, + Ah::BlockDiag, + Qh::PSDMatrix{S,<:BlockDiag}, + C_DxD::BlockDiag, + C_2DxD::BlockDiag, + diffusion::Union{Number,Diagonal}, +) where {T,S} + @simd ivdep for i in eachindex(blocks(Σ_out.R)) + predict_cov!( + PSDMatrix(Σ_out.R.blocks[i]), + PSDMatrix(Σ_curr.R.blocks[i]), + Ah.blocks[i], + PSDMatrix(Qh.R.blocks[i]), + C_DxD.blocks[i], + C_2DxD.blocks[i], + diffusion isa Number ? diffusion : diffusion.diag[i], + ) + end + return Σ_out +end diff --git a/src/filtering/update.jl b/src/filtering/update.jl index 3d9125fcd..4e4468480 100644 --- a/src/filtering/update.jl +++ b/src/filtering/update.jl @@ -193,6 +193,50 @@ function update!( return x_out, loglikelihood end +function update!( + x_out::SRGaussian{T,<:BlockDiag}, + x_pred::SRGaussian{T,<:BlockDiag}, + measurement::Gaussian{ + <:AbstractVector, + <:Union{<:PSDMatrix{T,<:BlockDiag},<:BlockDiag}, + }, + H::BlockDiag, + K1_cache::BlockDiag, + K2_cache::BlockDiag, + M_cache::BlockDiag, + C_dxd::BlockDiag, + C_d::AbstractVector; + R::Union{Nothing,PSDMatrix{T,<:BlockDiag}}=nothing, +) where {T} + d = length(blocks(x_out.Σ.R)) + q = size(blocks(x_out.Σ.R)[1], 1) - 1 + + ll = zero(eltype(x_out.μ)) + for i in eachindex(blocks(x_out.Σ.R)) + _, _ll = update!( + Gaussian(view(x_out.μ, (i-1)*(q+1)+1:i*(q+1)), + PSDMatrix(x_out.Σ.R.blocks[i])), + Gaussian(view(x_pred.μ, (i-1)*(q+1)+1:i*(q+1)), + PSDMatrix(x_pred.Σ.R.blocks[i])), + Gaussian(view(measurement.μ, i:i), + if measurement.Σ isa PSDMatrix + PSDMatrix(measurement.Σ.R.blocks[i]) + else + measurement.Σ.blocks[i] + end), + H.blocks[i], + K1_cache.blocks[i], + K2_cache.blocks[i], + M_cache.blocks[i], + C_dxd.blocks[i], + view(C_d, i:i); + R=isnothing(R) ? nothing : PSDMatrix(blocks(R.R)[i]), + ) + ll += _ll + end + return x_out, ll +end + # Short-hand with cache function update!(x_out, x, measurement, H; cache, R=nothing) @unpack K1, m_tmp, C_DxD, C_dxd, C_Dxd, C_d = cache diff --git a/src/initialization/classicsolverinit.jl b/src/initialization/classicsolverinit.jl index e7bd1547f..123ddadee 100644 --- a/src/initialization/classicsolverinit.jl +++ b/src/initialization/classicsolverinit.jl @@ -90,7 +90,7 @@ end function rk_init_improve(cache::AbstractODEFilterCache, ts, us, dt) @unpack A, Q = cache # @unpack Ah, Qh = cache - @unpack x, x_pred, x_filt, measurement = cache + @unpack x, x_pred, x_filt, measurement, x_tmp = cache @unpack K1, C_Dxd, C_DxD, C_dxd, C_3DxD, C_d = cache @unpack backward_kernel = cache @@ -98,7 +98,7 @@ function rk_init_improve(cache::AbstractODEFilterCache, ts, us, dt) make_preconditioners!(cache, dt) @unpack P, PI = cache - _gaussian_mul!(x, P, x) + _gaussian_mul!(x, P, copy!(x_tmp, x)) preds = [] filts = [copy(x)] diff --git a/src/integrator_utils.jl b/src/integrator_utils.jl index e13c9e4b8..b27e0b60c 100644 --- a/src/integrator_utils.jl +++ b/src/integrator_utils.jl @@ -47,13 +47,12 @@ function calibrate_solution!(integ, mle_diffusion) set_diffusions!(integ.sol, mle_diffusion * integ.cache.default_diffusion) # Rescale all filtering estimates to have the correct diffusion - @assert mle_diffusion isa Number || mle_diffusion isa Diagonal - sqrt_diff = mle_diffusion isa Number ? sqrt(mle_diffusion) : sqrt.(mle_diffusion) + @assert mle_diffusion isa Diagonal @simd ivdep for C in integ.sol.x_filt.Σ - rmul!(C.R, sqrt_diff) + apply_diffusion!(C, mle_diffusion) end @simd ivdep for C in integ.sol.backward_kernels.C - rmul!(C.R, sqrt_diff) + apply_diffusion!(C, mle_diffusion) end # Re-write into the solution estimates @@ -70,13 +69,17 @@ Set the contents of `solution.diffusions` to the provided `diffusion`, overwriti diffusion estimates that are in there. Typically, `diffusion` is either a global quasi-MLE or the specified initial diffusion value if no calibration is desired. """ -function set_diffusions!(solution::AbstractProbODESolution, diffusion::Number) - solution.diffusions .= diffusion - return nothing -end -function set_diffusions!(solution::AbstractProbODESolution, diffusion::Diagonal) - @simd ivdep for d in solution.diffusions - copy!(d, diffusion) +function set_diffusions!(solution::AbstractProbODESolution, diffusion) + if diffusion isa Diagonal{<:Number,<:FillArrays.Fill} + @simd ivdep for i in eachindex(solution.diffusions) + solution.diffusions[i] = copy(diffusion) + end + elseif diffusion isa Diagonal{<:Number,<:Vector} + @simd ivdep for d in solution.diffusions + copy!(d, diffusion) + end + else + throw(ArgumentError("unexpected diffusion type $(typeof(diffusion))")) end return nothing end diff --git a/src/kronecker.jl b/src/kronecker.jl index 7c6d71a48..f18bffd74 100644 --- a/src/kronecker.jl +++ b/src/kronecker.jl @@ -62,6 +62,7 @@ end Base.:*(K::IKP, a::Number) = IsometricKroneckerProduct(K.ldim, K.B * a) Base.:*(a::Number, K::IKP) = IsometricKroneckerProduct(K.ldim, a * K.B) LinearAlgebra.adjoint(A::IKP) = IsometricKroneckerProduct(A.ldim, A.B') +LinearAlgebra.rmul!(A::IKP, b::Number) = IsometricKroneckerProduct(A.ldim, rmul!(A.B, b)) function check_same_size(A::IKP, B::IKP) if A.ldim != B.ldim || size(A.B) != size(B.B) @@ -102,6 +103,13 @@ Base.:+(A::IKP, B::IKP) = begin end Base.:+(U::UniformScaling, K::IKP) = IsometricKroneckerProduct(K.ldim, U + K.B) Base.:+(K::IKP, U::UniformScaling) = IsometricKroneckerProduct(K.ldim, U + K.B) + +add!(out::IsometricKroneckerProduct, toadd::IsometricKroneckerProduct) = begin + @assert out.ldim == toadd.ldim + add!(out.B, toadd.B) + return out +end + Base.:-(U::UniformScaling, K::IKP) = IsometricKroneckerProduct(K.ldim, U - K.B) LinearAlgebra.inv(K::IKP) = IsometricKroneckerProduct(K.ldim, inv(K.B)) Base.:/(A::IKP, B::IKP) = begin @@ -152,6 +160,27 @@ _matmul!( return A end +mul!(A::IKP, b::Number, C::IKP) = begin + check_matmul_sizes(A, C) + mul!(A.B, b, C.B) + return A +end +mul!(A::IKP, B::IKP, c::Number) = begin + check_matmul_sizes(A, B) + mul!(A.B, B.B, c) + return A +end +_matmul!(A::IKP, b::Number, C::IKP) = begin + check_matmul_sizes(A, C) + _matmul!(A.B, b, C.B) + return A +end +_matmul!(A::IKP, B::IKP, c::Number) = begin + check_matmul_sizes(A, B) + _matmul!(A.B, B.B, c) + return A +end + """ Allocation-free reshape Found here: https://discourse.julialang.org/t/convert-array-into-matrix-in-place/55624/5 diff --git a/src/perform_step.jl b/src/perform_step.jl index 29574b68a..d1c834284 100644 --- a/src/perform_step.jl +++ b/src/perform_step.jl @@ -124,7 +124,7 @@ function OrdinaryDiffEq.perform_step!(integ, cache::EKCache, repeat_step=false) # Update the global diffusion MLE (if applicable) if !isdynamic(cache.diffusionmodel) - cache.global_diffusion = estimate_global_diffusion(cache.diffusionmodel, integ) + estimate_global_diffusion(cache.diffusionmodel, integ) end # Advance the state @@ -163,7 +163,7 @@ compute_measurement_covariance!(cache) = begin _matmul!(cache.C_Dxd, cache.x_pred.Σ.R, cache.H') _matmul!(cache.measurement.Σ, cache.C_Dxd', cache.C_Dxd) if !isnothing(cache.R) - cache.measurement.Σ .+= _matmul!(cache.C_dxd, cache.R.R', cache.R.R) + add!(cache.measurement.Σ, _matmul!(cache.C_dxd, cache.R.R', cache.R.R)) end end @@ -218,35 +218,22 @@ To save allocations, the function modifies the given `cache` and writes into `cache.C_Dxd` during some computations. """ function estimate_errors!(cache::AbstractODEFilterCache) - @unpack local_diffusion, Qh, H, d = cache - - R = cache.C_Dxd - - if local_diffusion isa Diagonal - _QR = cache.C_DxD .= Qh.R .* sqrt.(local_diffusion.diag)' - _matmul!(R, _QR, H') - error_estimate = view(cache.tmp, 1:d) - sum!(abs2, error_estimate', view(R, :, 1:d)) - error_estimate .= sqrt.(error_estimate) - return error_estimate - elseif local_diffusion isa Number - _matmul!(R, Qh.R, H') - - # error_estimate = diag(PSDMatrix(R)) - # error_estimate .*= local_diffusion - # error_estimate .= sqrt.(error_estimate) - # error_estimate = view(error_estimate, 1:d) - - # faster: - error_estimate = view(cache.tmp, 1:d) - if R isa IsometricKroneckerProduct - error_estimate .= sum(abs2, R.B) - else - sum!(abs2, error_estimate', view(R, :, 1:d)) - end - error_estimate .*= local_diffusion - error_estimate .= sqrt.(error_estimate) + @unpack local_diffusion, Qh, H, C_d, C_Dxd, C_DxD = cache + _Q = apply_diffusion!(PSDMatrix(C_DxD), Qh, local_diffusion) + _HQH = PSDMatrix(_matmul!(C_Dxd, _Q.R, H')) + error_estimate = diag!(C_d, _HQH) + @.. error_estimate = sqrt(error_estimate) + return error_estimate +end - return error_estimate +diag!(v::AbstractVector, M::PSDMatrix) = (sum!(abs2, v', M.R); v) +diag!(v::AbstractVector, M::PSDMatrix{<:Number,<:IsometricKroneckerProduct}) = + v .= sum(abs2, M.R.B) +diag!(v::AbstractVector, M::PSDMatrix{<:Number,<:BlockDiag}) = begin + @assert length(v) == nblocks(M.R) + @assert size(blocks(M.R)[1], 2) == 1 # assumes all of them have the same shape + @simd ivdep for i in eachindex(blocks(M.R)) + v[i] = sum(abs2, blocks(M.R)[i]) end + return v end diff --git a/src/preconditioning.jl b/src/preconditioning.jl index 8dfd9fac8..ad0e494a4 100644 --- a/src/preconditioning.jl +++ b/src/preconditioning.jl @@ -8,6 +8,13 @@ function init_preconditioner(C::DenseCovariance{elType}) where {elType} PI = kron(I(C.d), Diagonal(ones(elType, C.q + 1))) return P, PI end +function init_preconditioner(C::BlockDiagonalCovariance{elType}) where {elType} + B = Diagonal(ones(elType, C.q + 1)) + P = BlockDiag([B for _ in 1:C.d]) + BI = Diagonal(ones(elType, C.q + 1)) + PI = BlockDiag([BI for _ in 1:C.d]) + return P, PI +end function make_preconditioners!(cache, dt) @unpack P, PI, d, q = cache @@ -31,15 +38,10 @@ end end return P end - -@fastmath @inbounds function make_preconditioner!(P::IsometricKroneckerProduct, h, d, q) - val = factorial(q) / h^(q + 1 / 2) - @simd ivdep for j in 0:q - P.B.diag[j+1] = val - val /= (q - j) / h - end - return P -end +make_preconditioner!(P::IsometricKroneckerProduct, h, d, q) = + (make_preconditioner!(P.B, h, 1, q); P) +make_preconditioner!(P::BlockDiag, h, d, q) = + (make_preconditioner!(blocks(P)[1], h, 1, q); P) @fastmath @inbounds function make_preconditioner_inv!(PI::Diagonal, h, d, q) val = h^(q + 1 / 2) / factorial(q) @@ -52,13 +54,7 @@ end end return PI end - -@fastmath @inbounds function make_preconditioner_inv!( - PI::IsometricKroneckerProduct, h, d, q) - val = h^(q + 1 / 2) / factorial(q) - @simd ivdep for j in 0:q - PI.B.diag[j+1] = val - val *= (q - j) / h - end - return PI -end +make_preconditioner_inv!(PI::IsometricKroneckerProduct, h, d, q) = + (make_preconditioner_inv!(PI.B, h, 1, q); PI) +make_preconditioner_inv!(PI::BlockDiag, h, d, q) = + (make_preconditioner_inv!(blocks(PI)[1], h, 1, q); PI) diff --git a/src/priors/iwp.jl b/src/priors/iwp.jl index 3a7d7f311..43bc2397e 100644 --- a/src/priors/iwp.jl +++ b/src/priors/iwp.jl @@ -158,8 +158,8 @@ end function initialize_transition_matrices(FAC::IsometricKroneckerCovariance, p::IWP, dt) A, Q = preconditioned_discretize(p) P, PI = initialize_preconditioner(FAC, p, dt) - Ah = PI * A * P - Qh = PSDMatrix(Q.R * PI) + Ah = copy(A) + Qh = copy(Q) return A, Q, Ah, Qh, P, PI end function initialize_transition_matrices(FAC::DenseCovariance, p::IWP, dt) @@ -169,6 +169,15 @@ function initialize_transition_matrices(FAC::DenseCovariance, p::IWP, dt) Ah, Qh = copy(A), copy(Q) return A, Q, Ah, Qh, P, PI end +function initialize_transition_matrices(FAC::BlockDiagonalCovariance, p::IWP, dt) + A, Q = preconditioned_discretize(p) + A = to_factorized_matrix(FAC, A) + Q = to_factorized_matrix(FAC, Q) + P, PI = initialize_preconditioner(FAC, p, dt) + Ah = copy(A) + Qh = copy(Q) + return A, Q, Ah, Qh, P, PI +end function make_transition_matrices!(cache, prior::IWP, dt) @unpack A, Q, Ah, Qh, P, PI = cache diff --git a/src/priors/ltisde.jl b/src/priors/ltisde.jl index 6f7f14d57..059788ac0 100644 --- a/src/priors/ltisde.jl +++ b/src/priors/ltisde.jl @@ -60,44 +60,8 @@ function matrix_fraction_decomposition( ) d = size(drift, 1) M = [drift dispersion*dispersion'; zero(drift) -drift'] - Mexp = exponential!(dt * M) + Mexp = exp(dt * M) A = Mexp[1:d, 1:d] Q = Mexp[1:d, d+1:end] * A' return A, Q end - -# Previous implementation, outdated thanks to FiniteHorizonGramians.jl: -function _discretize_sqrt_with_quadraturetrick(sde::LTISDE, dt::Real) - F, L = drift(sde), dispersion(sde) - - D = size(F, 1) - d = size(L, 2) - N = D # more robust than Int(D / d) - R = similar(F, N * d, D) - method = ExpMethodHigham2005() - expcache = ExponentialUtilities.alloc_mem(F, method) - - Ah = exponential!(dt * F, method, expcache) - - chol_integrand(τ) = begin - E = exponential!((dt - τ) * F', method, expcache) - L'E - end - nodes, weights = gausslegendre(N) - b, a = dt, 0 - @. nodes = (b - a) / 2 * nodes + (a + b) / 2 - @. weights = (b - a) / 2 * weights - @simd ivdep for i in 1:N - R[(i-1)*d+1:i*d, 1:D] .= sqrt(weights[i]) .* chol_integrand(nodes[i]) - end - - M = R'R |> Symmetric - chol = cholesky!(M, check=false) - Qh_R = if issuccess(chol) - chol.U |> Matrix - else - qr!(R).R |> Matrix - end - - return Ah, Qh_R -end diff --git a/src/projection.jl b/src/projection.jl index 29097b253..ba08e65e0 100644 --- a/src/projection.jl +++ b/src/projection.jl @@ -30,6 +30,17 @@ function projection(C::IsometricKroneckerCovariance{elType}) where {elType} return Proj end +function projection(C::BlockDiagonalCovariance{elType}) where {elType} + Proj(deriv) = begin + e_i = zeros(elType, C.q + 1, 1) + if deriv <= C.q + e_i[deriv+1] = 1 + end + return BlockDiag([copy(e_i)' for _ in 1:C.d]) + end + return Proj +end + function solution_space_projection(C::CovarianceStructure, is_secondorder_ode) Proj = projection(C) if is_secondorder_ode @@ -47,6 +58,14 @@ function solution_space_projection(C::IsometricKroneckerCovariance, is_secondord return Proj(0) end end +function solution_space_projection(C::BlockDiagonalCovariance, is_secondorder_ode) + Proj = projection(C) + if is_secondorder_ode + error("Not yet implemented!") + else + return Proj(0) + end +end struct KroneckerSecondOrderODESolutionProjector{T,FAC,M,M2} <: AbstractMatrix{T} covariance_structure::FAC diff --git a/test/Project.toml b/test/Project.toml index f0fff40ee..4705022bd 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,5 +1,6 @@ [deps] Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0" DiffEqDevTools = "f3b72e0c-5b89-59e1-b016-84e28bfd966d" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" @@ -24,4 +25,3 @@ TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" [compat] Aqua = "0.8.2" -DiffEqDevTools = "2.44.1" diff --git a/test/autodiff.jl b/test/autodiff.jl index de5dbef83..235478b23 100644 --- a/test/autodiff.jl +++ b/test/autodiff.jl @@ -9,48 +9,49 @@ using ForwardDiff import ODEProblemLibrary: prob_ode_fitzhughnagumo -const _prob = prob_ode_fitzhughnagumo -const prob = ODEProblem(modelingtoolkitize(_prob), _prob.u0, _prob.tspan, jac=true) +@testset "solver: $ALG" for ALG in (EK0, EK1, DiagonalEK1) + _prob = prob_ode_fitzhughnagumo + prob = ODEProblem(modelingtoolkitize(_prob), _prob.u0, _prob.tspan, jac=true) + function param_to_loss(p) + sol = solve( + remake(prob, p=p), + ALG(order=3, smooth=false), + sensealg=SensitivityADPassThrough(), + abstol=1e-3, + reltol=1e-2, + save_everystep=false, + dense=false, + ) + return norm(sol.u[end]) # Dummy loss + end + function startval_to_loss(u0) + sol = solve( + remake(prob, u0=u0), + ALG(order=3, smooth=false), + sensealg=SensitivityADPassThrough(), + abstol=1e-3, + reltol=1e-2, + save_everystep=false, + dense=false, + ) + return norm(sol.u[end]) # Dummy loss + end -function param_to_loss(p) - sol = solve( - remake(prob, p=p), - EK1(order=3, smooth=false), - sensealg=SensitivityADPassThrough(), - abstol=1e-3, - reltol=1e-2, - save_everystep=false, - dense=false, - ) - return norm(sol.u[end]) # Dummy loss -end -function startval_to_loss(u0) - sol = solve( - remake(prob, u0=u0), - EK1(order=3, smooth=false), - sensealg=SensitivityADPassThrough(), - abstol=1e-3, - reltol=1e-2, - save_everystep=false, - dense=false, - ) - return norm(sol.u[end]) # Dummy loss -end + dldp = FiniteDiff.finite_difference_gradient(param_to_loss, prob.p) + dldu0 = FiniteDiff.finite_difference_gradient(startval_to_loss, prob.u0) -const dldp = FiniteDiff.finite_difference_gradient(param_to_loss, prob.p) -const dldu0 = FiniteDiff.finite_difference_gradient(startval_to_loss, prob.u0) + @testset "ForwardDiff.jl" begin + @test ForwardDiff.gradient(param_to_loss, prob.p) ≈ dldp rtol = 1e-3 + @test ForwardDiff.gradient(startval_to_loss, prob.u0) ≈ dldu0 rtol = 5e-3 + end -@testset "ForwardDiff.jl" begin - @test ForwardDiff.gradient(param_to_loss, prob.p) ≈ dldp rtol = 1e-4 - @test ForwardDiff.gradient(startval_to_loss, prob.u0) ≈ dldu0 rtol = 5e-4 -end + # @testset "ReverseDiff.jl" begin + # @test_broken ReverseDiff.gradient(param_to_loss, prob.p) ≈ dldp + # @test_broken ReverseDiff.gradient(startval_to_loss, prob.u0) ≈ dldu0 + # end -# @testset "ReverseDiff.jl" begin -# @test_broken ReverseDiff.gradient(param_to_loss, prob.p) ≈ dldp -# @test_broken ReverseDiff.gradient(startval_to_loss, prob.u0) ≈ dldu0 -# end - -# @testset "Zygote.jl" begin -# @test_broken Zygote.gradient(param_to_loss, prob.p) ≈ dldp -# @test_broken Zygote.gradient(startval_to_loss, prob.u0) ≈ dldu0 -# end + # @testset "Zygote.jl" begin + # @test_broken Zygote.gradient(param_to_loss, prob.p) ≈ dldp + # @test_broken Zygote.gradient(startval_to_loss, prob.u0) ≈ dldu0 + # end +end diff --git a/test/complexity.jl b/test/complexity.jl index 31482dea5..a87bbc471 100644 --- a/test/complexity.jl +++ b/test/complexity.jl @@ -7,98 +7,86 @@ using Test, SafeTestsets @testset "Scaling with ODE dimension" begin f(du, u, p, t) = mul!(du, -0.9I, u) + jac(J, u, p, t) = @simd ivdep for i in 1:size(J, 1) + J[i, i] = -0.9 + end tspan = (0.0, 1.0) prob = ODEProblem(f, ones(1), tspan) NUMRUNS = 20 - @testset "Order 1 + perfect init + no smoothing" begin - time_dim(d; Alg) = begin - _prob = remake(prob, u0=ones(d)) - tmin = Inf - for _ in 1:NUMRUNS - integ = init(_prob, - Alg( - smooth=false, - order=1, - initialization=ClassicSolverInit(), - ), - dense=false, save_everystep=false, - adaptive=false, dt=1e-2, - ) - t = @elapsed solve!(integ) - tmin = min(tmin, t) - end - return tmin + _timer(d, alg; kwargs...) = begin + _prob = remake( + prob, + u0=ones(d), + f=ODEFunction(f; jac=jac, jac_prototype=Diagonal(ones(d))), + ) + tmin = Inf + for _ in 1:NUMRUNS + integ = init(_prob, alg; adaptive=false, dt=1e-2, kwargs...) + t = @elapsed solve!(integ) + tmin = min(tmin, t) end + return tmin + end - dims_ek0 = 2 .^ (8:15) - times_ek0 = [time_dim(d; Alg=EK0) for d in dims_ek0] - dims_ek1 = 2 .^ (2:6) - times_ek1 = [time_dim(d; Alg=EK1) for d in dims_ek1] + @testset "Order 1 + perfect init + no smoothing" begin + t(d, Alg) = _timer( + d, Alg(smooth=false, order=1, initialization=ClassicSolverInit()); + dense=false, save_everystep=false, + ) + dims_ek0 = 2 .^ (8:15) + times_ek0 = [t(d, EK0) for d in dims_ek0] lr_ek0 = linregress(log.(dims_ek0), log.(times_ek0)) - @test_skip slope(lr_ek0)[1] ≈ 1 atol = 0.1 - @test 0.5 < slope(lr_ek0)[1] < 1.3 + @test slope(lr_ek0)[1] ≈ 1 atol = 1 + dims_ek1 = 2 .^ (3:6) + times_ek1 = [t(d, EK1) for d in dims_ek1] lr_ek1 = linregress(log.(dims_ek1), log.(times_ek1)) - @test_skip slope(lr_ek1)[1] ≈ 2 atol = 0.2 - # This is what we would actually expect, not sure what's going wrong: - @test_skip slope(lr_ek1)[1] ≈ 3 atol = 0.1 + @test slope(lr_ek1)[1] ≈ 3 atol = 1 + + dims_dek1 = 2 .^ (4:10) + times_dek1 = [t(d, DiagonalEK1) for d in dims_dek1] + lr_dek1 = linregress(log.(dims_dek1), log.(times_dek1)) + @test slope(lr_dek1)[1] ≈ 1 atol = 1 end @testset "Order 3 + Taylor-init + no smoothing" begin - time_dim(d; Alg) = begin - _prob = remake(prob, u0=ones(d)) - tmin = Inf - for _ in 1:NUMRUNS - integ = init(_prob, Alg(smooth=false), - dense=false, save_everystep=false, - adaptive=false, dt=1e-2) - t = @elapsed solve!(integ) - tmin = min(tmin, t) - end - return tmin - end + t(d, Alg) = _timer(d, Alg(smooth=false); dense=false, save_everystep=false) dims_ek0 = 2 .^ (8:15) - times_ek0 = [time_dim(d; Alg=EK0) for d in dims_ek0] - dims_ek1 = 2 .^ (2:5) - times_ek1 = [time_dim(d; Alg=EK1) for d in dims_ek1] - + times_ek0 = [t(d, EK0) for d in dims_ek0] lr_ek0 = linregress(log.(dims_ek0), log.(times_ek0)) - @test_skip slope(lr_ek0)[1] ≈ 1 atol = 0.1 - @test 0.5 < slope(lr_ek0)[1] < 1.3 + @test slope(lr_ek0)[1] ≈ 1 atol = 1 + dims_ek1 = 2 .^ (3:6) + times_ek1 = [t(d, EK1) for d in dims_ek1] lr_ek1 = linregress(log.(dims_ek1), log.(times_ek1)) - @test_skip slope(lr_ek1)[1] ≈ 2 atol = 0.5 - # This is what we would actually expect, not sure what's going wrong: - @test_skip slope(lr_ek1)[1] ≈ 3 atol = 0.1 + @test slope(lr_ek1)[1] ≈ 3 atol = 1 + + dims_dek1 = 2 .^ (4:10) + times_dek1 = [t(d, DiagonalEK1) for d in dims_dek1] + lr_dek1 = linregress(log.(dims_dek1), log.(times_dek1)) + @test slope(lr_dek1)[1] ≈ 1 atol = 1 end @testset "Order 3 with smoothing and everyting" begin - time_dim(d; Alg) = begin - _prob = remake(prob, u0=ones(d)) - tmin = Inf - for _ in 1:NUMRUNS - integ = init(_prob, Alg(), adaptive=false, dt=1e-2) - t = @elapsed solve!(integ) - tmin = min(tmin, t) - end - return tmin - end + t(d, Alg) = _timer(d, Alg()) dims_ek0 = 2 .^ (8:13) - times_ek0 = [time_dim(d; Alg=EK0) for d in dims_ek0] - dims_ek1 = 2 .^ (1:4) - times_ek1 = [time_dim(d; Alg=EK1) for d in dims_ek1] - + times_ek0 = [t(d, EK0) for d in dims_ek0] lr_ek0 = linregress(log.(dims_ek0), log.(times_ek0)) - @test 0.5 < slope(lr_ek0)[1] < 1.3 + @test slope(lr_ek0)[1] ≈ 1 atol = 1 + dims_ek1 = 2 .^ (3:6) + times_ek1 = [t(d, EK1) for d in dims_ek1] lr_ek1 = linregress(log.(dims_ek1), log.(times_ek1)) - @test_skip slope(lr_ek1)[1] ≈ 2 atol = 0.2 - # This is what we would actually expect, not sure what's going wrong: - @test_skip slope(lr_ek1)[1] ≈ 3 atol = 0.1 + @test slope(lr_ek1)[1] ≈ 3 atol = 1 + + dims_dek1 = 2 .^ (4:10) + times_dek1 = [t(d, DiagonalEK1) for d in dims_dek1] + lr_dek1 = linregress(log.(dims_dek1), log.(times_dek1)) + @test slope(lr_dek1)[1] ≈ 1 atol = 1 end end diff --git a/test/core/blockdiagonals.jl b/test/core/blockdiagonals.jl new file mode 100644 index 000000000..c0969571b --- /dev/null +++ b/test/core/blockdiagonals.jl @@ -0,0 +1,85 @@ +using ProbNumDiffEq +import ProbNumDiffEq as PNDE +import ProbNumDiffEq: BlockDiag, _matmul! +using LinearAlgebra +using BlockDiagonals +using Test + +d1, d2 = 2, 3 +D = d1 * d2 +@testset "T=$T" for T in (Float64, BigFloat) + A = BlockDiag([randn(T, d1, d1) for _ in 1:d2]) + B = BlockDiag([randn(T, d1, d1) for _ in 1:d2]) + C = BlockDiag([randn(T, d1, d1) for _ in 1:d2]) + alpha = rand(T) + beta = rand(T) + + AM, BM, CM = @test_nowarn Matrix.((A, B, C)) + + @test Matrix(BlockDiagonal(A)) == AM + @test Matrix(BlockDiagonal(B)) == BM + @test Matrix(BlockDiagonal(C)) == CM + + _A = @test_nowarn copy(A) + @test _A isa BlockDiag + + _B = @test_nowarn copy!(_A, B) + @test _B === _A + @test _B == B + + _A = @test_nowarn similar(A) + @test _A isa BlockDiag + @test size(_A) == size(A) + + _Z = @test_nowarn zero(A) + @test _Z isa BlockDiag + @test size(_Z) == size(A) + @test all(_Z .== 0) + + function tttm(M) # quick type test and to matrix + @test M isa BlockDiag + return Matrix(M) + end + + @test tttm(mul!(C, A, B)) ≈ mul!(CM, AM, BM) + @test tttm(mul!(C, A', B)) ≈ mul!(CM, AM', BM) + @test tttm(mul!(C, A, B')) ≈ mul!(CM, AM, BM') + @test tttm(_matmul!(C, A, B)) ≈ _matmul!(CM, AM, BM) + @test tttm(_matmul!(C, A', B)) ≈ _matmul!(CM, AM', BM) + @test tttm(_matmul!(C, A, B')) ≈ _matmul!(CM, AM, BM') + + @test tttm(mul!(C, A, B, alpha, beta)) ≈ mul!(CM, AM, BM, alpha, beta) + @test tttm(_matmul!(C, A, B, alpha, beta)) ≈ _matmul!(CM, AM, BM, alpha, beta) + + @test tttm(A * B) ≈ AM * BM + @test tttm(A' * B) ≈ AM' * BM + @test tttm(A * B') ≈ AM * BM' + + @test tttm(A * alpha) ≈ AM * alpha + @test tttm(alpha * A) ≈ alpha * AM + @test tttm(A * (alpha * I)) ≈ AM * alpha + @test tttm((alpha * I) * A) ≈ alpha * AM + @test tttm(rmul!(copy(A), alpha)) ≈ alpha * AM + @test tttm(mul!(_A, alpha, A)) ≈ alpha * AM + @test tttm(mul!(_A, A, alpha)) ≈ alpha * AM + @test tttm(_matmul!(_A, alpha, A)) ≈ alpha * AM + @test tttm(_matmul!(_A, A, alpha)) ≈ alpha * AM + + @test tttm((alpha * I(D)) * A) ≈ alpha * AM + @test tttm(A * (alpha * I(D))) ≈ AM * alpha + @test tttm(mul!(_A, A, alpha * I(D))) ≈ alpha * AM + @test tttm(mul!(_A, alpha * I(D), A)) ≈ alpha * AM + @test tttm(_matmul!(_A, A, alpha * I(D))) ≈ alpha * AM + @test tttm(_matmul!(_A, alpha * I(D), A)) ≈ alpha * AM + + @test_throws ErrorException view(A, 1:2, 1:2) + + tttm(copy!(copy(A), Diagonal(A))) + + @test tttm(A + A) ≈ AM + AM + @test tttm(A - A) ≈ AM - AM + + _A = copy(A) + @test tttm(PNDE.add!(_A, A)) == AM + AM + @test Matrix(_A) == AM + AM +end diff --git a/test/core/diffusions.jl b/test/core/diffusions.jl new file mode 100644 index 000000000..2d7fb59de --- /dev/null +++ b/test/core/diffusions.jl @@ -0,0 +1,59 @@ +using ProbNumDiffEq +import ProbNumDiffEq as PNDE +using Test +using LinearAlgebra +using FillArrays + +d, q = 2, 3 +T = Float64 + +@testset "$diffusionmodel" for diffusionmodel in ( + DynamicDiffusion(), + DynamicMVDiffusion(), + FixedDiffusion(), + FixedDiffusion(calibrate=false), + FixedMVDiffusion(), + FixedMVDiffusion(; initial_diffusion=rand(d)), + FixedMVDiffusion(; initial_diffusion=Diagonal(rand(d))), + FixedMVDiffusion(; initial_diffusion=Diagonal(rand(d)), calibrate=false), +) + + # Test the initial diffusion + diffusion = PNDE.initial_diffusion(diffusionmodel, d, q, T) + @assert size(diffusion) == (d, d) + @assert diffusion isa Diagonal + if !(diffusionmodel isa FixedMVDiffusion || diffusionmodel isa DynamicMVDiffusion) + @assert diffusion isa Diagonal{T,<:Fill} + end + + # Test applying the diffusion + _, Q = PNDE.discretize(PNDE.IWP{T}(d, q), 0.1) + Qmat = PSDMatrix(Matrix(Q.R)) + _diffusion = rand() * diffusion + @testset "$FAC" for FAC in ( + PNDE.DenseCovariance{T}(d, q), + PNDE.BlockDiagonalCovariance{T}(d, q), + PNDE.IsometricKroneckerCovariance{T}(d, q), + ) + if diffusion isa Diagonal{T,<:Vector} && FAC isa PNDE.IsometricKroneckerCovariance + continue + end + + _Q = PNDE.to_factorized_matrix(FAC, Q) + Qdiff = @test_nowarn PNDE.apply_diffusion(_Q, _diffusion) + Qmatdiff = @test_nowarn PNDE.apply_diffusion(Qmat, _diffusion) + @test Qdiff == Qmatdiff + + Qdiff = @test_nowarn PNDE.apply_diffusion!(copy(_Q), _diffusion) + @test Qdiff == Qmatdiff + + Qdiff = @test_nowarn PNDE.apply_diffusion!(copy(_Q), _Q, _diffusion) + @test Qdiff == Qmatdiff + end + + @testset "Calibration" begin + # MLE + # At their core, they all just compute z' * inv(S) * z + # and then do something with the result + end +end diff --git a/test/core/fast_linalg.jl b/test/core/fast_linalg.jl new file mode 100644 index 000000000..2363f553f --- /dev/null +++ b/test/core/fast_linalg.jl @@ -0,0 +1,52 @@ +using ProbNumDiffEq +import ProbNumDiffEq: _matmul! +using LinearAlgebra +using Test + +@testset "T=$T" for T in (Float64, BigFloat) + A = rand(T, 2, 3) + B = rand(T, 3, 4) + C = rand(T, 2, 4) + alpha = rand(T) + beta = rand(T) + + @test _matmul!(C, A, B) == mul!(C, A, B) + @test _matmul!(C, A, B, alpha, beta) == mul!(C, A, B, alpha, beta) + + _B = copy(B) + @test _matmul!(_B, alpha, B) == mul!(_B, alpha, B) + @test _matmul!(_B, B, beta) == mul!(_B, B, beta) + + # Diagonals + D = Diagonal(rand(T, size(B, 1))) + @test _matmul!(_B, D, B) == mul!(_B, D, B) + @test _matmul!(_B, D, B, alpha, beta) == mul!(_B, D, B, alpha, beta) + D = Diagonal(rand(T, size(B, 2))) + @test _matmul!(_B, B, D) == mul!(_B, B, D) + @test _matmul!(_B, B, D, alpha, beta) == mul!(_B, B, D, alpha, beta) + CD, D1, D2 = rand(T, 3, 3), Diagonal(rand(T, 3)), Diagonal(rand(T, 3)) + @test _matmul!(CD, D1, D2) == _matmul!(CD, D1, D2) + + # Triangulars + ASQ, BSQ, CSQ = rand(T, 2, 2), rand(T, 2, 2), rand(T, 2, 2) + ALT, AUT = LowerTriangular(ASQ), UpperTriangular(ASQ) + BLT, BUT = LowerTriangular(BSQ), UpperTriangular(BSQ) + @test _matmul!(CSQ, ALT, BSQ) == mul!(CSQ, ALT, BSQ) + @test _matmul!(CSQ, AUT, BSQ) == mul!(CSQ, AUT, BSQ) + @test _matmul!(CSQ, ASQ, BLT) == mul!(CSQ, ASQ, BLT) + @test _matmul!(CSQ, ASQ, BUT) == mul!(CSQ, ASQ, BUT) + @test _matmul!(CSQ, ALT, BUT) == mul!(CSQ, ALT, BUT) + @test _matmul!(CSQ, AUT, BLT) == mul!(CSQ, AUT, BLT) + @test _matmul!(CSQ, ALT, BLT) == mul!(CSQ, ALT, BLT) + @test _matmul!(CSQ, AUT, BUT) == mul!(CSQ, AUT, BUT) + + # Adjoints + AT = Matrix(A')' + @test _matmul!(C, AT, B) == mul!(C, AT, B) + BT = Matrix(B')' + @test _matmul!(C, A, BT) == mul!(C, A, BT) + + # Vectors + CV, BV = rand(T, 2), rand(T, 3) + @test _matmul!(CV, A, BV) == mul!(CV, A, BV) +end diff --git a/test/core/filtering.jl b/test/core/filtering.jl index d570e2eed..9da066b22 100644 --- a/test/core/filtering.jl +++ b/test/core/filtering.jl @@ -5,53 +5,53 @@ Check the correctness of the filtering implementations vs. basic readable math c using Test using ProbNumDiffEq using LinearAlgebra -import ProbNumDiffEq: IsometricKroneckerProduct +import ProbNumDiffEq: IsometricKroneckerProduct, BlockDiag import ProbNumDiffEq as PNDE +using BlockDiagonals +using FillArrays +import ProbNumDiffEq.GaussianDistributions: logpdf @testset "PREDICT" begin # Setup - d = 5 - m = rand(d) - P_R = Matrix(UpperTriangular(rand(d, d))) - P = P_R'P_R + d = 2 + q = 2 + D = d * (q + 1) + m = rand(D) - A = rand(d, d) - Q_R = Matrix(UpperTriangular(rand(d, d))) - Q = Q_R'Q_R + _P_R = IsometricKroneckerProduct(d, Matrix(UpperTriangular(rand(q + 1, q + 1)))) + _P = _P_R'_P_R + PM = Matrix(_P) - # PREDICT - m_p = A * m - P_p = A * P * A' + Q + _A = IsometricKroneckerProduct(d, rand(q + 1, q + 1)) + AM = Matrix(_A) - x_curr = Gaussian(m, P) - x_out = copy(x_curr) + _Q_R = IsometricKroneckerProduct(d, Matrix(UpperTriangular(rand(q + 1, q + 1)))) + _Q = _Q_R'_Q_R + QM = Matrix(_Q) - C_DxD = zeros(d, d) - C_2DxD = zeros(2d, d) - C_3DxD = zeros(3d, d) + # PREDICT + m_p = AM * m + P_p = AM * PM * AM' + QM - _fstr(F) = F ? "Kronecker" : "None" - @testset "Factorization: $(_fstr(KRONECKER))" for KRONECKER in (false, true) - if KRONECKER - K = 2 - m = kron(ones(K), m) - P_R = IsometricKroneckerProduct(K, P_R) - P = P_R'P_R + @testset "Factorization: $_FAC" for _FAC in ( + PNDE.DenseCovariance, + PNDE.BlockDiagonalCovariance, + PNDE.IsometricKroneckerCovariance, + ) + FAC = _FAC{Float64}(d, q) - A = IsometricKroneckerProduct(K, A) - Q_R = IsometricKroneckerProduct(K, Q_R) - Q = Q_R'Q_R + P_R = PNDE.to_factorized_matrix(FAC, _P_R) + P = P_R'P_R + A = PNDE.to_factorized_matrix(FAC, _A) + Q_R = PNDE.to_factorized_matrix(FAC, _Q_R) + Q = Q_R'Q_R - m_p = A * m - P_p = A * P * A' + Q + x_curr = Gaussian(m, P) + x_out = copy(x_curr) - x_curr = Gaussian(m, P) - x_out = copy(x_curr) - - C_DxD = IsometricKroneckerProduct(K, C_DxD) - C_2DxD = IsometricKroneckerProduct(K, C_2DxD) - C_3DxD = IsometricKroneckerProduct(K, C_3DxD) - end + C_DxD = PNDE.factorized_zeros(FAC, D, D) + C_2DxD = PNDE.factorized_zeros(FAC, 2D, D) + C_3DxD = PNDE.factorized_zeros(FAC, 3D, D) @testset "predict" begin x_out = ProbNumDiffEq.predict(x_curr, A, Q) @@ -68,6 +68,28 @@ import ProbNumDiffEq as PNDE @test P_p ≈ Matrix(x_out.Σ) end + @testset "predict! with PSDMatrix and diffusion" begin + for diffusion in (rand(), rand() * Eye(d), rand() * I(d), Diagonal(rand(d))) + if _FAC == PNDE.IsometricKroneckerCovariance && + !( + diffusion isa Number || + diffusion isa Diagonal{<:Number,<:FillArrays.Fill} + ) + continue + end + _diffusions = diffusion isa Number ? diffusion * Ones(d) : diffusion.diag + + QM_diff = Matrix(BlockDiagonal([σ² * _Q.B for σ² in _diffusions])) + P_p_diff = AM * PM * AM' + QM_diff + + x_curr = Gaussian(m, PSDMatrix(P_R)) + x_out = copy(x_curr) + Q_SR = PSDMatrix(Q_R) + ProbNumDiffEq.predict!(x_out, x_curr, A, Q_SR, C_DxD, C_2DxD, diffusion) + @test P_p_diff ≈ Matrix(x_out.Σ) + end + end + @testset "predict! with zero diffusion" begin x_curr = Gaussian(m, PSDMatrix(P_R)) x_out = copy(x_curr) @@ -81,8 +103,10 @@ import ProbNumDiffEq as PNDE x_curr = Gaussian(m, PSDMatrix(P_R)) x_out = copy(x_curr) # marginalize! needs tall square-roots: - Q_SR = if KRONECKER + Q_SR = if Q_R isa IsometricKroneckerProduct PSDMatrix(IsometricKroneckerProduct(Q_R.ldim, [Q_R.B; zero(Q_R.B)])) + elseif Q_R isa BlockDiag + PSDMatrix(BlockDiag([[B; zero(B)] for B in Q_R.blocks])) else PSDMatrix([Q_R; zero(Q_R)]) end @@ -98,60 +122,67 @@ end @testset "UPDATE" begin # Setup d = 5 + o = 1 + m_p = rand(d) - P_p_R = Matrix(UpperTriangular(rand(d, d))) - P_p = P_p_R'P_p_R + _P_p_R = IsometricKroneckerProduct(o, Matrix(UpperTriangular(rand(d, d)))) + _P_p = _P_p_R'_P_p_R + P_p_M = Matrix(_P_p) # Measure - o = 1 _HB = rand(1, d) - H = kron(I(o), _HB) + _H = IsometricKroneckerProduct(o, _HB) + HM = Matrix(_H) R = zeros(o, o) z_data = zeros(o) - z = H * m_p - SR = P_p_R * H' - S = Symmetric(SR'SR) - - # UPDATE - S_inv = inv(S) - K = P_p * H' * S_inv - m = m_p + K * (z_data .- z) - P = P_p - K * S * K' - - x_pred = Gaussian(m_p, P_p) - x_out = copy(x_pred) - measurement = Gaussian(z, S) + z = _H * m_p + _SR = _P_p_R * _H' + _S = _SR'_SR + SM = Matrix(_S) - C_dxd = zeros(o, o) - C_d = zeros(o) - C_Dxd = zeros(d, o) - C_DxD = zeros(d, d) - C_2DxD = zeros(2d, d) - C_3DxD = zeros(3d, d) + LL = logpdf(Gaussian(z, SM), z_data) - _fstr(F) = F ? "Kronecker" : "None" - @testset "Factorization: $(_fstr(KRONECKER))" for KRONECKER in (false, true) - if KRONECKER - P_p_R = IsometricKroneckerProduct(1, P_p_R) - P_p = P_p_R'P_p_R - - H = IsometricKroneckerProduct(1, _HB) - R = zeros(o, o) - - SR = IsometricKroneckerProduct(1, SR) - S = SR'SR - - x_pred = Gaussian(m_p, P_p) - x_out = copy(x_pred) - measurement = Gaussian(z, S) - - C_dxd = IsometricKroneckerProduct(1, C_dxd) - C_Dxd = IsometricKroneckerProduct(1, C_Dxd) - C_DxD = IsometricKroneckerProduct(1, C_DxD) - C_2DxD = IsometricKroneckerProduct(1, C_2DxD) - C_3DxD = IsometricKroneckerProduct(1, C_3DxD) - end + # UPDATE + KM = P_p_M * HM' / SM + m = m_p + KM * (z_data .- z) + P = P_p_M - KM * SM * KM' + + _R_R = rand(o, o) + _R = _R_R'_R_R + _SR_noisy = qr([_P_p_R * _H'; _R_R]).R |> Matrix + _S_noisy = _SR_noisy'_SR_noisy + SM_noisy = Matrix(_S_noisy) + + KM_noisy = P_p_M * HM' / SM_noisy + m_noisy = m_p + KM_noisy * (z_data .- z) + P_noisy = P_p_M - KM_noisy * SM_noisy * KM_noisy' + + @testset "Factorization: $_FAC" for _FAC in ( + PNDE.DenseCovariance, + PNDE.BlockDiagonalCovariance, + PNDE.IsometricKroneckerCovariance, + ) + FAC = _FAC{Float64}(o, d) + + C_dxd = PNDE.factorized_zeros(FAC, o, o) + C_d = zeros(o) + C_Dxd = PNDE.factorized_zeros(FAC, d, o) + C_DxD = PNDE.factorized_zeros(FAC, d, d) + C_2DxD = PNDE.factorized_zeros(FAC, 2d, d) + C_3DxD = PNDE.factorized_zeros(FAC, 3d, d) + + P_p_R = PNDE.to_factorized_matrix(FAC, _P_p_R) + P_p = P_p_R'P_p_R + + H = PNDE.to_factorized_matrix(FAC, _H) + + SR = PNDE.to_factorized_matrix(FAC, _SR) + S = SR'SR + + x_pred = Gaussian(m_p, P_p) + x_out = copy(x_pred) + measurement = Gaussian(z, S) @testset "update" begin x_out = ProbNumDiffEq.update(x_pred, measurement, H) @@ -170,7 +201,7 @@ end z_cache = C_d x_pred = Gaussian(x_pred.μ, PSDMatrix(P_p_R)) x_out = copy(x_pred) - ProbNumDiffEq.update!( + _, ll = ProbNumDiffEq.update!( x_out, x_pred, msmnt, @@ -183,6 +214,7 @@ end ) @test m ≈ x_out.μ @test P ≈ Matrix(x_out.Σ) + @test ll ≈ LL end @testset "Zero predicted covariance" begin K_cache = copy(C_Dxd) @@ -230,6 +262,8 @@ end SR = qr([P_p_R * H'; R_R]).R |> Matrix S = Symmetric(SR'SR) + LL = logpdf(Gaussian(z, S), z_data) + # UPDATE S_inv = inv(S) K = P_p * H' * S_inv @@ -288,7 +322,7 @@ end z_cache = C_d x_pred = Gaussian(x_pred.μ, PSDMatrix(P_p_R)) x_out = copy(x_pred) - ProbNumDiffEq.update!( + _, ll = ProbNumDiffEq.update!( x_out, x_pred, msmnt, @@ -302,6 +336,7 @@ end ) @test m ≈ x_out.μ @test P ≈ Matrix(x_out.Σ) + @test ll ≈ LL end @testset "Zero predicted covariance" begin K_cache = copy(C_Dxd) @@ -334,57 +369,63 @@ end @testset "SMOOTH" begin # Setup d = 5 - m, m_s = rand(d), rand(d) - P_R, P_s_R = Matrix(UpperTriangular(rand(d, d))), Matrix(UpperTriangular(rand(d, d))) - P, P_s = P_R'P_R, P_s_R'P_s_R + q = 2 + D = d * (q + 1) + + m, m_s = rand(D), rand(D) + _P_R = IsometricKroneckerProduct(d, Matrix(UpperTriangular(rand(q + 1, q + 1)))) + _P_s_R = IsometricKroneckerProduct(d, Matrix(UpperTriangular(rand(q + 1, q + 1)))) + _P, _P_s = _P_R'_P_R, _P_s_R'_P_s_R + PM, P_sM = Matrix(_P), Matrix(_P_s) - A = rand(d, d) - Q_R = Matrix(UpperTriangular(rand(d, d))) + I - Q = Q_R'Q_R - Q_SR = PSDMatrix(Q_R) + _A = IsometricKroneckerProduct(d, rand(q + 1, q + 1)) + AM = Matrix(_A) + _Q_R = IsometricKroneckerProduct(d, Matrix(UpperTriangular(rand(q + 1, q + 1)) + I)) + _Q = _Q_R'_Q_R + _Q_SR = PSDMatrix(_Q_R) # PREDICT first - m_p = A * m - P_p_R = qr([P_R * A'; Q_R]).R |> Matrix - P_p = A * P * A' + Q - @assert P_p ≈ P_p_R'P_p_R + m_p = AM * m + _P_p_R = IsometricKroneckerProduct(d, qr([_P_R.B * _A.B'; _Q_R.B]).R |> Matrix) + _P_p = _A * _P * _A' + _Q + @assert _P_p ≈ _P_p_R'_P_p_R + P_pM = Matrix(_P_p) # SMOOTH - G = P * A' * inv(P_p) + G = _P * _A' * inv(_P_p) |> Matrix m_smoothed = m + G * (m_s - m_p) - P_smoothed = P + G * (P_s - P_p) * G' + P_smoothed = PM + G * (P_sM - P_pM) * G' - x_curr = Gaussian(m, P) - x_next = Gaussian(m_s, P_s) x_smoothed = Gaussian(m_smoothed, P_smoothed) - _fstr(F) = F ? "Kronecker" : "None" - @testset "Factorization: $(_fstr(KRONECKER))" for KRONECKER in (false, true) - K = 2 - if KRONECKER - P_R = IsometricKroneckerProduct(K, P_R) - P = P_R'P_R + @testset "Factorization: $_FAC" for _FAC in ( + PNDE.DenseCovariance, + PNDE.BlockDiagonalCovariance, + PNDE.IsometricKroneckerCovariance, + ) + FAC = _FAC{Float64}(d, q) - P_s_R = IsometricKroneckerProduct(K, P_s_R) - P_s = P_s_R'P_s_R - - P_p_R = IsometricKroneckerProduct(K, P_p_R) - P_p = P_p_R'P_p_R + P_R = PNDE.to_factorized_matrix(FAC, _P_R) + P = P_R'P_R + P_s_R = PNDE.to_factorized_matrix(FAC, _P_s_R) + P_s = P_s_R'P_s_R + P_p_R = PNDE.to_factorized_matrix(FAC, _P_p_R) + P_p = P_p_R'P_p_R - m, m_s, m_p = kron(ones(K), m), kron(ones(K), m_s), kron(ones(K), m_p) + x_curr = Gaussian(m, P) + x_next = Gaussian(m_s, P_s) - A = IsometricKroneckerProduct(K, A) - Q_R = IsometricKroneckerProduct(K, Q_R) - Q = Q_R'Q_R - Q_SR = PSDMatrix(Q_R) + A = PNDE.to_factorized_matrix(FAC, _A) + Q_R = PNDE.to_factorized_matrix(FAC, _Q_R) + Q = Q_R'Q_R + Q_SR = PSDMatrix(Q_R) - x_curr = Gaussian(m, P) - x_next = Gaussian(m_s, P_s) + x_curr = Gaussian(m, P) + x_next = Gaussian(m_s, P_s) - m_smoothed = kron(ones(K), m_smoothed) - P_smoothed = IsometricKroneckerProduct(K, P_smoothed) - x_smoothed = Gaussian(m_smoothed, P_smoothed) - end + C_DxD = PNDE.factorized_zeros(FAC, D, D) + C_2DxD = PNDE.factorized_zeros(FAC, 2D, D) + C_3DxD = PNDE.factorized_zeros(FAC, 3D, D) @testset "smooth" begin x_out, _ = ProbNumDiffEq.smooth(x_curr, x_next, A, Q) @@ -401,22 +442,12 @@ end @testset "smooth via backward kernels" begin K_forward = ProbNumDiffEq.AffineNormalKernel(copy(A), copy(Q_SR)) K_backward = ProbNumDiffEq.AffineNormalKernel( - copy(A), copy(m_p), - if KRONECKER - PSDMatrix(IsometricKroneckerProduct(K, zeros(2d, d))) - else - PSDMatrix(zeros(2d, d)) - end) + copy(A), copy(m_p), PSDMatrix(copy(C_2DxD))) x_curr = Gaussian(m, PSDMatrix(P_R)) |> copy x_next_pred = Gaussian(m_p, PSDMatrix(P_p_R)) |> copy x_next_smoothed = Gaussian(m_s, PSDMatrix(P_s_R)) |> copy - C_DxD = if KRONECKER - IsometricKroneckerProduct(K, zeros(d, d)) - else - zeros(d, d) - end ProbNumDiffEq.compute_backward_kernel!( K_backward, x_next_pred, x_curr, K_forward; C_DxD) @@ -427,11 +458,6 @@ end @test K_backward.b ≈ b @test Matrix(K_backward.C) ≈ Λ - C_3DxD = if KRONECKER - IsometricKroneckerProduct(K, zeros(3d, d)) - else - zeros(3d, d) - end ProbNumDiffEq.marginalize_mean!(x_curr.μ, x_next_smoothed.μ, K_backward) ProbNumDiffEq.marginalize_cov!( x_curr.Σ, @@ -454,6 +480,32 @@ end @test K2.b == K_backward.b @test K2.C == K_backward.C end + + @testset "smooth via backward kernels with diffusion $diffusion" for diffusion in + ( + rand(), rand() * Eye(d), rand() * I(d), Diagonal(rand(d)), + ) + if _FAC == PNDE.IsometricKroneckerCovariance && + !( + diffusion isa Number || + diffusion isa Diagonal{<:Number,<:FillArrays.Fill} + ) + continue + end + _diffusions = + diffusion isa Number ? diffusion * Ones(d) : diffusion.diag + QM_diff = Matrix(BlockDiagonal([σ² * _Q.B for σ² in _diffusions])) + + ProbNumDiffEq.compute_backward_kernel!( + K_backward, x_next_pred, x_curr, K_forward; C_DxD, diffusion) + + G = Matrix(x_curr.Σ) * Matrix(A)' * inv(Matrix(x_next_pred.Σ)) + b = x_curr.μ - G * x_next_pred.μ + Λ = (I - G * AM) * Matrix(x_curr.Σ) * (I - G * AM)' + G * QM_diff * G' + @test K_backward.A ≈ G + @test K_backward.b ≈ b + @test Matrix(K_backward.C) ≈ Λ + end end end end diff --git a/test/core/kronecker.jl b/test/core/kronecker.jl index 982654da7..5e8ad085c 100644 --- a/test/core/kronecker.jl +++ b/test/core/kronecker.jl @@ -14,11 +14,27 @@ q = 2 K2 = PNDE.IsometricKroneckerProduct(d, R2) M2 = Matrix(K2) + # Base + K3 = PNDE.IsometricKroneckerProduct(d, copy(R2)) + M3 = Matrix(K3) + @test similar(K1) isa PNDE.IsometricKroneckerProduct + @test copy(K1) isa PNDE.IsometricKroneckerProduct + @test copy!(K3, K1) isa PNDE.IsometricKroneckerProduct + @test K3 == K1 + @test size(K1) == size(M1) + + function tttm(M) # quick type test and to matrix + @test M isa PNDE.IsometricKroneckerProduct + return Matrix(M) + end + # Matrix-Matrix Operations - @test K1 * K2 ≈ M1 * M2 - @test K1 * K2 isa PNDE.IsometricKroneckerProduct - @test K1 + K2 ≈ M1 + M2 - @test K1 + K2 isa PNDE.IsometricKroneckerProduct + @test tttm(K1 * K2) ≈ M1 * M2 + @test tttm(K1 + K2) ≈ M1 + M2 + + _K1 = copy(K1) + @test tttm(PNDE.add!(_K1, K2)) ≈ M1 + M2 + @test _K1 ≈ M1 + M2 # DimensionMismatch X = PNDE.IsometricKroneckerProduct(d, rand(T, 1, 1)) @@ -31,38 +47,21 @@ q = 2 R4 = rand(T, q + 1) K4 = PNDE.IsometricKroneckerProduct(d, R4) M4 = Matrix(K4) - @test K1 * K4 ≈ M1 * M4 + @test tttm(K1 * K4) ≈ M1 * M4 @test_throws DimensionMismatch K1 + K4 # UniformScaling - @test I + K1 ≈ I + M1 - @test I + K1 isa PNDE.IsometricKroneckerProduct - @test K1 + I ≈ M1 + I - @test K1 + I isa PNDE.IsometricKroneckerProduct - @test I - K1 ≈ I - M1 - @test I - K1 isa PNDE.IsometricKroneckerProduct - @test K1 - I ≈ M1 - I - @test K1 - I isa PNDE.IsometricKroneckerProduct + @test tttm(I + K1) ≈ I + M1 + @test tttm(K1 + I) ≈ M1 + I + @test tttm(I - K1) ≈ I - M1 + @test tttm(K1 - I) ≈ M1 - I # Other LinearAlgebra - @test K1' ≈ M1' - @test K1' isa PNDE.IsometricKroneckerProduct - @test inv(K1) ≈ inv(M1) - @test inv(K1) isa PNDE.IsometricKroneckerProduct + @test tttm(K1') ≈ M1' + @test tttm(inv(K1)) ≈ inv(M1) @test det(K1) ≈ det(M1) - @test K1 / K2 ≈ M1 / M2 - @test K1 / K2 isa PNDE.IsometricKroneckerProduct - @test K1 \ K2 ≈ M1 \ M2 - @test K1 \ K2 isa PNDE.IsometricKroneckerProduct - - # Base - K3 = PNDE.IsometricKroneckerProduct(d, copy(R2)) - M3 = Matrix(K3) - @test similar(K1) isa PNDE.IsometricKroneckerProduct - @test copy(K1) isa PNDE.IsometricKroneckerProduct - @test copy!(K3, K1) isa PNDE.IsometricKroneckerProduct - @test K3 == K1 - @test size(K1) == size(M1) + @test tttm(K1 / K2) ≈ M1 / M2 + @test tttm(K1 \ K2) ≈ M1 \ M2 # Base @test one(K1) isa PNDE.IsometricKroneckerProduct @@ -72,10 +71,13 @@ q = 2 # Matrix-Scalar α = 2.0 - @test α * K1 ≈ α * M1 - @test α * K1 isa PNDE.IsometricKroneckerProduct - @test K1 * α ≈ α * M1 - @test K1 * α isa PNDE.IsometricKroneckerProduct + @test tttm(α * K1) ≈ α * M1 + @test tttm(K1 * α) ≈ α * M1 + _K1 = copy(K1) + @test mul!(_K1, α, K1) == α * K1 + @test mul!(_K1, K1, α) == α * K1 + @test PNDE._matmul!(_K1, K1, α) == α * K1 + @test PNDE._matmul!(_K1, α, K1) == α * K1 # In-place Matrix-Matrix Multiplication β = -0.5 diff --git a/test/core/preconditioning.jl b/test/core/preconditioning.jl index 41502fa6b..27c02cd62 100644 --- a/test/core/preconditioning.jl +++ b/test/core/preconditioning.jl @@ -2,10 +2,6 @@ using Test using LinearAlgebra using ProbNumDiffEq import ProbNumDiffEq as PNDE -import ODEProblemLibrary: prob_ode_lotkavolterra - -prob = prob_ode_lotkavolterra -prob = remake(prob, tspan=(0.0, 10.0)) @testset "Condition numbers of A,Q" begin h = 0.1 * rand() @@ -34,3 +30,22 @@ prob = remake(prob, tspan=(0.0, 10.0)) @test cond(Matrix(Qh)) > cond(Matrix(Qh_p)) @test cond(Matrix(Qh)) > cond(Matrix(Qh_p))^2 end + +@testset "Covariance Factorizations all doing the correct thing" begin + h = rand() + d, q = 2, 3 + + function make_preconditioners(FAC) + P, PI = PNDE.init_preconditioner(FAC{Float64}(d, q)) + PNDE.make_preconditioner!(P, h, d, q) + PNDE.make_preconditioner_inv!(PI, h, d, q) + return P, PI + end + + PK, PIK = make_preconditioners(PNDE.IsometricKroneckerCovariance) + PD, PID = make_preconditioners(PNDE.DenseCovariance) + PB, PIB = make_preconditioners(PNDE.BlockDiagonalCovariance) + + @test PK == PD == PB + @test PIK == PID == PIB +end diff --git a/test/core/priors.jl b/test/core/priors.jl index dac7769f2..688374876 100644 --- a/test/core/priors.jl +++ b/test/core/priors.jl @@ -7,9 +7,9 @@ using FiniteHorizonGramians using Statistics using Plots using SimpleUnPack +using FillArrays h = 0.1 -σ = 0.1 @testset "General prior API" begin for prior in ( @@ -31,11 +31,6 @@ h = 0.1 @test A1 ≈ A3 @test Matrix(Q1) ≈ Q3 - A4, Q4R = PNDE._discretize_sqrt_with_quadraturetrick( - PNDE.LTISDE(Matrix(sde.F), Matrix(sde.L)), h) - @test A1 ≈ A4 - @test Q1.R ≈ Q4R - ts = 0:0.1:1 marginals = @test_nowarn PNDE.marginalize(prior, ts) @test length(marginals) == length(ts) @@ -54,6 +49,8 @@ end @testset "Test IWP (d=2,q=2)" begin d, q = 2, 2 + σ = 0.1 + prior = PNDE.IWP(dim=d, num_derivatives=q) # true sde parameters @@ -122,41 +119,54 @@ end @testset "Test vanilla (ie. non-preconditioned)" begin Ah, Qh = PNDE.discretize(prior, h) - Qh = PNDE.apply_diffusion(Qh, σ^2) - @test AH_22_IBM ≈ Ah - @test QH_22_IBM ≈ Matrix(Qh) + + for Γ in (σ^2, σ^2 * Eye(d)) + @test QH_22_IBM ≈ Matrix(PNDE.apply_diffusion(Qh, Γ)) + end end @testset "Test with preconditioning" begin A, Q = PNDE.preconditioned_discretize(prior) - Qh = PNDE.apply_diffusion(Q, σ^2) - @test AH_22_PRE ≈ Matrix(A) - @test QH_22_PRE ≈ Matrix(Qh) + + for Γ in (σ^2, σ^2 * Eye(d)) + @test QH_22_PRE ≈ Matrix(PNDE.apply_diffusion(Q, Γ)) + end end @testset "Test `make_transition_matrices!`" begin - A, Q, Ah, Qh, P, PI = PNDE.initialize_transition_matrices( - PNDE.DenseCovariance{Float64}(d, q), prior, h) - - @test AH_22_PRE ≈ A - @test QH_22_PRE ≈ Matrix(PNDE.apply_diffusion(Q, σ^2)) - - cache = ( - d=d, - q=q, - A=A, - Q=Q, - P=P, - PI=PI, - Ah=Ah, - Qh=Qh, - ) - - make_transition_matrices!(cache, prior, h) - @test AH_22_IBM ≈ cache.Ah - @test QH_22_IBM ≈ Matrix(PNDE.apply_diffusion(cache.Qh, σ^2)) + for FAC in (PNDE.IsometricKroneckerCovariance, PNDE.BlockDiagonalCovariance) + A, Q, Ah, Qh, P, PI = PNDE.initialize_transition_matrices( + PNDE.BlockDiagonalCovariance{Float64}(d, q), prior, h) + + @test AH_22_PRE ≈ A + + for Γ in (σ^2, σ^2 * Eye(d), σ^2 * I(d)) + @test QH_22_PRE ≈ Matrix(PNDE.apply_diffusion(Q, Γ)) + end + + cache = ( + d=d, + q=q, + A=A, + Q=Q, + P=P, + PI=PI, + Ah=Ah, + Qh=Qh, + ) + + make_transition_matrices!(cache, prior, h) + @test AH_22_IBM ≈ cache.Ah + + for Γ in (σ^2, σ^2 * Eye(d), σ^2 * I(d)) + @test QH_22_IBM ≈ Matrix(PNDE.apply_diffusion(cache.Qh, Γ)) + end + if FAC != PNDE.IsometricKroneckerCovariance + @test QH_22_IBM ≈ Matrix(PNDE.apply_diffusion(cache.Qh, σ^2 * I(d))) + end + end end end diff --git a/test/correctness.jl b/test/correctness.jl index df76f16de..d43ab1175 100644 --- a/test/correctness.jl +++ b/test/correctness.jl @@ -30,6 +30,10 @@ CONSTANT_ALGS = ( EK1(order=3, smooth=false, diffusionmodel=FixedDiffusion()) => 1e-8, EK1(order=3, smooth=false, initialization=ClassicSolverInit()) => 1e-7, EK1(order=3, smooth=false, initialization=SimpleInit()) => 1e-5, + DiagonalEK1(order=3, smooth=false) => 1e-7, + DiagonalEK1(order=3, smooth=false, diffusionmodel=FixedDiffusion()) => 1e-7, + DiagonalEK1(order=3, smooth=false, diffusionmodel=DynamicDiffusion()) => 1e-7, + DiagonalEK1(order=3, smooth=false, initialization=ClassicSolverInit()) => 1e-7, # smoothing EK0(order=3, smooth=true) => 1e-8, EK0(order=3, smooth=true, diffusionmodel=FixedDiffusion()) => 2e-8, @@ -37,6 +41,10 @@ CONSTANT_ALGS = ( EK0(order=3, smooth=true, diffusionmodel=DynamicMVDiffusion()) => 1e-8, EK1(order=3, smooth=true) => 1e-8, EK1(order=3, smooth=true, diffusionmodel=FixedDiffusion()) => 1e-8, + DiagonalEK1(order=3, smooth=true) => 1e-7, + DiagonalEK1(order=3, smooth=true, diffusionmodel=FixedDiffusion()) => 1e-7, + DiagonalEK1(order=3, smooth=true, diffusionmodel=DynamicDiffusion()) => 1e-7, + DiagonalEK1(order=3, smooth=true, initialization=ClassicSolverInit()) => 1e-7, # Priors EK0(prior=IOUP(3, -1), smooth=true) => 2e-9, EK1(prior=IOUP(3, -1), smooth=true, diffusionmodel=FixedDiffusion()) => 1e-9, @@ -53,12 +61,17 @@ ADAPTIVE_ALGS = ( EK0(order=3, initialization=ClassicSolverInit()) => 5e-5, EK0(order=3, initialization=SimpleInit()) => 1e-4, EK0(order=3, diffusionmodel=DynamicMVDiffusion(), initialization=ClassicSolverInit()) => 4e-5, + EK0(order=3, diffusionmodel=FixedMVDiffusion()) => 1e-4, EK1(order=2) => 2e-5, EK1(order=3) => 1e-5, EK1(order=5) => 1e-6, EK1(order=8) => 5e-6, EK1(order=3, initialization=ClassicSolverInit()) => 1e-5, EK1(order=3, initialization=SimpleInit()) => 1e-4, + DiagonalEK1(order=3) => 1e-4, + DiagonalEK1(order=3, diffusionmodel=FixedDiffusion()) => 1e-4, + DiagonalEK1(order=3, diffusionmodel=DynamicDiffusion()) => 1e-4, + DiagonalEK1(order=3, initialization=ClassicSolverInit()) => 1e-4, # Priors EK0(prior=IOUP(3, -1), smooth=true) => 1e-5, EK1(prior=IOUP(3, -1), smooth=true, diffusionmodel=FixedDiffusion()) => 1e-5, diff --git a/test/data_likelihoods.jl b/test/data_likelihoods.jl index 26a6b2596..9fe370616 100644 --- a/test/data_likelihoods.jl +++ b/test/data_likelihoods.jl @@ -33,25 +33,29 @@ kwargs = ( ) @testset "Compare data likelihoods" begin @testset "$alg" for alg in ( + # EK0 + EK0(), + EK0(diffusionmodel=FixedDiffusion()), + EK0(diffusionmodel=FixedMVDiffusion(rand(2), false)), + EK0(diffusionmodel=DynamicMVDiffusion()), + EK0(prior=IOUP(3, -1)), + EK0(prior=Matern(3, 1.5)), + # EK1 EK1(), EK1(diffusionmodel=FixedDiffusion()), EK1(diffusionmodel=FixedMVDiffusion(rand(2), false)), EK1(prior=IOUP(3, -1)), EK1(prior=Matern(3, 1.5)), EK1(prior=IOUP(3, update_rate_parameter=true)), + # DiagonalEK1 + DiagonalEK1(), + DiagonalEK1(diffusionmodel=FixedDiffusion()), + DiagonalEK1(diffusionmodel=FixedMVDiffusion(rand(2), false)), ) compare_data_likelihoods(alg; kwargs...) end end -@testset "EK0 is not (yet) supported" begin - for ll in (PNDE.dalton_data_loglik, PNDE.filtering_data_loglik) - @test_broken ll(prob, EK0(smooth=false); kwargs...) - end - @test_broken PNDE.fenrir_data_loglik( - prob, EK0(smooth=true); kwargs...) -end - @testset "Partial observations" begin H = [1 0;] data_part = (t=times, u=[H * d for d in obss]) @@ -63,6 +67,14 @@ end adaptive=false, dt=DT, dense=false, ) + @test_broken compare_data_likelihoods( + DiagonalEK1(); + observation_matrix=H, + observation_noise_cov=σ^2, + data=data_part, + adaptive=false, dt=DT, + dense=false, + ) end @testset "Observation noise types: $(typeof(Σ))" for Σ in ( @@ -70,15 +82,27 @@ end σ^2 * I, σ^2 * I(2), σ^2 * Eye(2), + Diagonal([σ^2 0; 0 2σ^2]), [σ^2 0; 0 2σ^2], (A = randn(2, 2); A'A), (PSDMatrix(randn(2, 2))), ) - compare_data_likelihoods( - EK1(); - observation_noise_cov=Σ, - data=data, - adaptive=false, dt=DT, - dense=false, - ) + @testset "$alg" for alg in (EK0(), DiagonalEK1(), EK1()) + if alg isa EK0 && !( + Σ isa Number || Σ isa UniformScaling || + Σ isa Diagonal{<:Number,<:FillArrays.Fill} + ) + continue + end + if alg isa DiagonalEK1 && !(Σ isa Number || Σ isa UniformScaling || Σ isa Diagonal) + continue + end + compare_data_likelihoods( + alg; + observation_noise_cov=Σ, + data=data, + adaptive=false, dt=DT, + dense=false, + ) + end end diff --git a/test/errors_thrown.jl b/test/errors_thrown.jl index 88c323259..562f2074f 100644 --- a/test/errors_thrown.jl +++ b/test/errors_thrown.jl @@ -31,3 +31,30 @@ end prior = IOUP(num_derivatives=1, rate_parameter=3, update_rate_parameter=true) @test_throws ArgumentError solve(prob, EK0(; prior)) end + +@testset "Invalid prior" begin + prob = prob_ode_lotkavolterra + @test_throws DimensionMismatch solve(prob, EK0(prior=IWP(dim=3, num_derivatives=2))) + prior = IOUP(num_derivatives=1, rate_parameter=3, update_rate_parameter=true) + @test_throws ArgumentError solve(prob, EK0(; prior)) +end + +@testset "Invalid solver configurations" begin + prob = prob_ode_lotkavolterra + + # Global calibration + observation noise doesn't work + @test_throws ArgumentError solve( + prob, EK0(pn_observation_noise=1, diffusionmodel=FixedDiffusion())) + @test_throws ArgumentError solve( + prob, EK0(pn_observation_noise=1, diffusionmodel=FixedMVDiffusion())) + + # EK1 + Multivariate diffusion doesn't work: + @test_throws ArgumentError solve( + prob, EK1(diffusionmodel=FixedMVDiffusion())) + @test_throws ArgumentError solve( + prob, EK1(diffusionmodel=DynamicMVDiffusion())) + + # Multivariate diffusion with non-diagonal diffusion model + @test_throws ArgumentError solve( + prob, EK0(diffusionmodel=FixedMVDiffusion(initial_diffusion=rand(2, 2)))) +end diff --git a/test/mass_matrix.jl b/test/mass_matrix.jl index b24ebe170..76786da85 100644 --- a/test/mass_matrix.jl +++ b/test/mass_matrix.jl @@ -34,11 +34,22 @@ using Test adaptive=false, dt=1e-2, ) + diagonalek1() = solve( + prob, + DiagonalEK1(smooth=false), + save_everystep=false, + dense=false, + adaptive=false, + dt=1e-2, + ) s1 = ek1() s0 = ek0() + s1diag = diagonalek1() ref = solve(prob, RadauIIA5(), abstol=1e-9, reltol=1e-6) @test s0.u[end] ≈ ref.u[end] rtol = 1e-7 + @test s1.u[end] ≈ ref.u[end] rtol = 1e-7 + @test s1diag.u[end] ≈ ref.u[end] rtol = 1e-7 @test s1.pu.Σ[1] isa PSDMatrix{<:Number,<:Matrix} @test s0.pu.Σ[1] isa PSDMatrix{<:Number,<:ProbNumDiffEq.IsometricKroneckerProduct} @@ -63,6 +74,7 @@ end 0 1 0 0 0 0 ] + M = Diagonal([1, 1, 0]) f = ODEFunction(rober, mass_matrix=M) prob = ODEProblem(f, [1.0, 0.0, 0.0], (0.0, 1e-2), (0.04, 3e7, 1e4)) @@ -78,4 +90,9 @@ end sol = solve(prob, EK1(order=3, initialization=SimpleInit())) @test sol.u[end] ≈ ref.u[end] rtol = 1e-8 + + sol = solve(prob, DiagonalEK1(order=3)) + @test sol.u[end] ≈ ref.u[end] rtol = 1e-8 + + @test_throws ArgumentError solve(prob, EK0()) end diff --git a/test/observation_noise.jl b/test/observation_noise.jl new file mode 100644 index 000000000..977fc8ac1 --- /dev/null +++ b/test/observation_noise.jl @@ -0,0 +1,28 @@ +using Test +using ProbNumDiffEq +using LinearAlgebra, FillArrays +import ODEProblemLibrary: prob_ode_lotkavolterra + +prob = prob_ode_lotkavolterra +d = length(prob.u0) +@testset "typeof(R)=$(typeof(R))" for (i, R) in enumerate(( + 0.1, + 0.1I, + 0.1Eye(d), + 0.1I(d), + Diagonal([0.1, 0.2]), + [0.1 0.01; 0.01 0.1], + PSDMatrix(0.1 * rand(d, d)), +)) + if i <= 3 + @test_nowarn solve(prob, EK0(pn_observation_noise=R)) + else + @test_broken solve(prob, EK0(pn_observation_noise=R)) + end + if i <= 5 + @test_nowarn solve(prob, DiagonalEK1(pn_observation_noise=R)) + else + @test_broken solve(prob, DiagonalEK1(pn_observation_noise=R)) + end + @test_nowarn solve(prob, EK1(pn_observation_noise=R)) +end diff --git a/test/runtests.jl b/test/runtests.jl index 966119b38..b39c91e9a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -19,12 +19,21 @@ const GROUP = get(ENV, "GROUP", "All") @testset "ProbNumDiffEq" begin if GROUP == "All" || GROUP == "Core" @timedtestset "Core" begin + @timedsafetestset "BlockDiagonals" begin + include("core/blockdiagonals.jl") + end + @timedsafetestset "FastLinalg (`_matmul!`)" begin + include("core/fast_linalg.jl") + end @timedsafetestset "Filtering" begin include("core/filtering.jl") end @timedsafetestset "Priors" begin include("core/priors.jl") end + @timedsafetestset "Diffusions" begin + include("core/diffusions.jl") + end @timedsafetestset "Preconditioning" begin include("core/preconditioning.jl") end @@ -120,6 +129,9 @@ const GROUP = get(ENV, "GROUP", "All") @timedsafetestset "Data Likelihoods" begin include("data_likelihoods.jl") end + @timedsafetestset "Observation noise" begin + include("observation_noise.jl") + end end end diff --git a/test/secondorderode.jl b/test/secondorderode.jl index b849af055..468a174b5 100644 --- a/test/secondorderode.jl +++ b/test/secondorderode.jl @@ -4,13 +4,13 @@ using LinearAlgebra using Test function twobody(du, u, p, t) - R3 = norm(u[1:2])^3 - @. du[3:4] = -u[1:2] / R3 - @. du[1:2] = u[3:4] + R3 = norm(u[3:4])^3 + @. du[3:4] = u[1:2] + @. du[1:2] = -u[3:4] / R3 end u0, du0 = [0.4, 0.0], [0.0, 2.0] tspan = (0, 0.1) -prob_base = ODEProblem(twobody, [u0...; du0...], tspan) +prob_base = ODEProblem(twobody, [du0...; u0...], tspan) function twobody2_iip(ddu, du, u, p, t) R3 = norm(u)^3 @@ -24,7 +24,7 @@ function twobody2_oop(du, u, p, t) end prob_oop = SecondOrderODEProblem(twobody2_oop, du0, u0, tspan) -appxsol = solve(prob_iip, Vern9(), abstol=1e-10, reltol=1e-10) +appxsol = solve(prob_base, Vern9(), abstol=1e-9, reltol=1e-6) @testset "$S" for (S, _prob) in (("IIP", prob_iip), ("OOP", prob_oop)) @testset "$alg" for alg in ( @@ -34,8 +34,8 @@ appxsol = solve(prob_iip, Vern9(), abstol=1e-10, reltol=1e-10) EK1(initialization=ForwardDiffInit(2)), # EK1(initialization=ClassicSolverInit()), # unstable for this problem EK1(diffusionmodel=FixedDiffusion()), - EK0(diffusionmodel=FixedMVDiffusion()), - EK0(diffusionmodel=DynamicMVDiffusion()), + # EK0(diffusionmodel=FixedMVDiffusion()), + # EK0(diffusionmodel=DynamicMVDiffusion()), ) sol = solve(_prob, alg) diff --git a/test/state_init.jl b/test/state_init.jl index 4330e5804..bf30c0082 100644 --- a/test/state_init.jl +++ b/test/state_init.jl @@ -64,7 +64,7 @@ import ODEProblemLibrary: prob_ode_fitzhughnagumo, prob_ode_pleiades prob, EK0(order=2, initialization=ClassicSolverInit(init_on_ddu=false)), ) - @test_broken init( + @test_nowarn init( prob, EK0(order=2, initialization=ClassicSolverInit(init_on_ddu=true)), ) diff --git a/test/stiff_problem.jl b/test/stiff_problem.jl index de8a6009f..e94a3ad54 100644 --- a/test/stiff_problem.jl +++ b/test/stiff_problem.jl @@ -8,3 +8,7 @@ appxsol = solve(prob, RadauIIA5()) sol = solve(prob, EK1(order=3)) @test appxsol.u[end] ≈ sol.u[end] rtol = 5e-3 @test appxsol(0.5) ≈ sol(0.5).μ rtol = 5e-3 + +sol = solve(prob, DiagonalEK1(order=3)) +@test appxsol.u[end] ≈ sol.u[end] rtol = 5e-3 +@test appxsol(0.5) ≈ sol(0.5).μ rtol = 5e-3