diff --git a/Project.toml b/Project.toml index f89912d..1d517f7 100644 --- a/Project.toml +++ b/Project.toml @@ -1,10 +1,11 @@ name = "AdvancedMH" uuid = "5b7e9947-ddc0-4b3f-9b55-0d8042f74170" -version = "0.8.4" +version = "0.8.5" [deps] AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" @@ -26,6 +27,7 @@ AdvancedMHStructArraysExt = "StructArrays" AbstractMCMC = "5.6" DiffResults = "1" Distributions = "0.25" +DocStringExtensions = "0.9" FillArrays = "1" ForwardDiff = "0.10" LinearAlgebra = "1.6" diff --git a/docs/Project.toml b/docs/Project.toml index 1814eb3..2ec693c 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,5 +1,15 @@ [deps] +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" +MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" [compat] Documenter = "1" +Distributions = "0.25" +LinearAlgebra = "1.6" +LogDensityProblems = "2" +MCMCChains = "6.0.4" +Random = "1.6" diff --git a/docs/src/api.md b/docs/src/api.md index c24b0c9..9924859 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -12,3 +12,9 @@ MetropolisHastings ```@docs DensityModel ``` + +## Samplers + +```@docs +RobustAdaptiveMetropolis +``` diff --git a/src/AdvancedMH.jl b/src/AdvancedMH.jl index 7ff9759..a3d2ece 100644 --- a/src/AdvancedMH.jl +++ b/src/AdvancedMH.jl @@ -3,8 +3,9 @@ module AdvancedMH # Import the relevant libraries. using AbstractMCMC using Distributions -using LinearAlgebra: I +using LinearAlgebra: LinearAlgebra, I using FillArrays: Zeros +using DocStringExtensions: FIELDS using LogDensityProblems: LogDensityProblems @@ -22,7 +23,8 @@ export SymmetricRandomWalkProposal, Ensemble, StretchProposal, - MALA + MALA, + RobustAdaptiveMetropolis # Reexports export sample, MCMCThreads, MCMCDistributed, MCMCSerial @@ -159,5 +161,6 @@ include("proposal.jl") include("mh-core.jl") include("emcee.jl") include("MALA.jl") +include("RobustAdaptiveMetropolis.jl") end # module AdvancedMH diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl new file mode 100644 index 0000000..e0284f3 --- /dev/null +++ b/src/RobustAdaptiveMetropolis.jl @@ -0,0 +1,278 @@ +# TODO: Should we generalise this arbitrary symmetric proposals? +""" + RobustAdaptiveMetropolis + +Robust Adaptive Metropolis-Hastings (RAM). + +This is a simple implementation of the RAM algorithm described in [^VIH12]. + +# Fields + +$(FIELDS) + +# Examples + +The following demonstrates how to implement a simple Gaussian model and sample from it using the RAM algorithm. + +```jldoctest ram-gaussian; setup=:(using Random; Random.seed!(1234);) +julia> using AdvancedMH, Distributions, MCMCChains, LogDensityProblems, LinearAlgebra + +julia> # Define a Gaussian with zero mean and some covariance. + struct Gaussian{A} + Σ::A + end + +julia> # Implement the LogDensityProblems interface. + LogDensityProblems.dimension(model::Gaussian) = size(model.Σ, 1) + +julia> function LogDensityProblems.logdensity(model::Gaussian, x) + d = LogDensityProblems.dimension(model) + return logpdf(MvNormal(zeros(d),model.Σ), x) + end + +julia> LogDensityProblems.capabilities(::Gaussian) = LogDensityProblems.LogDensityOrder{0}() + +julia> # Construct the model. We'll use a correlation of 0.5. + model = Gaussian([1.0 0.5; 0.5 1.0]); + +julia> # Number of samples we want in the resulting chain. + num_samples = 10_000; + +julia> # Number of warmup steps, i.e. the number of steps to adapt the covariance of the proposal. + # Note that these are not included in the resulting chain, as `discard_initial=num_warmup` + # by default in the `sample` call. To include them, pass `discard_initial=0` to `sample`. + num_warmup = 10_000; + +julia> # Sample! + chain = sample( + model, + RobustAdaptiveMetropolis(), + num_samples; + chain_type=Chains, num_warmup, progress=false, initial_params=zeros(2) + ); + +julia> isapprox(cov(Array(chain)), model.Σ; rtol = 0.2) +true +``` + +It's also possible to restrict the eigenvalues to avoid either too small or too large values. See p. 13 in [^VIH12]. + +```jldoctest ram-gaussian +julia> chain = sample( + model, + RobustAdaptiveMetropolis(eigenvalue_lower_bound=0.1, eigenvalue_upper_bound=2.0), + num_samples; + chain_type=Chains, num_warmup, progress=false, initial_params=zeros(2) + ); + +julia> norm(cov(Array(chain)) - [1.0 0.5; 0.5 1.0]) < 0.2 +true +``` + +# References +[^VIH12]: Vihola (2012) Robust adaptive Metropolis algorithm with coerced acceptance rate, Statistics and computing. +""" +Base.@kwdef struct RobustAdaptiveMetropolis{T,A<:Union{Nothing,AbstractMatrix{T}}} <: + AdvancedMH.MHSampler + "target acceptance rate. Default: 0.234." + α::T = 0.234 + "negative exponent of the adaptation decay rate. Default: `0.6`." + γ::T = 0.6 + "initial lower-triangular Cholesky factor of the covariance matrix. If specified, should be convertible into a `LowerTriangular`. Default: `nothing`, which is interpreted as the identity matrix." + S::A = nothing + "lower bound on eigenvalues of the adapted Cholesky factor. Default: `0.0`." + eigenvalue_lower_bound::T = 0.0 + "upper bound on eigenvalues of the adapted Cholesky factor. Default: `Inf`." + eigenvalue_upper_bound::T = Inf +end + +""" + RobustAdaptiveMetropolisState + +State of the Robust Adaptive Metropolis-Hastings (RAM) algorithm. + +See also: [`RobustAdaptiveMetropolis`](@ref). + +# Fields +$(FIELDS) +""" +struct RobustAdaptiveMetropolisState{T1,L,A,T2,T3} + "current realization of the chain." + x::T1 + "log density of `x` under the target model." + logprob::L + "current lower-triangular Cholesky factor." + S::A + "log acceptance ratio of the previous iteration (not necessarily of `x`)." + logα::T2 + "current step size for adaptation of `S`." + η::T3 + "current iteration." + iteration::Int + "whether the previous iteration was accepted." + isaccept::Bool +end + +AbstractMCMC.getparams(state::RobustAdaptiveMetropolisState) = state.x +function AbstractMCMC.setparams!!(state::RobustAdaptiveMetropolisState, x) + return RobustAdaptiveMetropolisState( + x, state.logprob, state.S, state.logα, state.η, state.iteration, state.isaccept + ) +end + +function ram_step_inner( + rng::Random.AbstractRNG, + model::AbstractMCMC.LogDensityModel, + sampler::RobustAdaptiveMetropolis, + state::RobustAdaptiveMetropolisState, +) + # This is the initial state. + f = model.logdensity + d = LogDensityProblems.dimension(f) + + # Sample the proposal. + x = state.x + U = randn(rng, eltype(x), d) + x_new = muladd(state.S, U, x) + + # Compute the acceptance probability. + lp = state.logprob + lp_new = LogDensityProblems.logdensity(f, x_new) + # Technically, the `min` here is unnecessary for sampling according to `min(..., 1)`. + # However, `ram_adapt` assumes that `logα` actually represents the log acceptance probability + # and is thus bounded at 0. Moreover, users might be interested in inspecting the average + # acceptance rate to check that the algorithm achieves something similar to the target rate. + # Hence, it's a bit more convenient for the user if we just perform the `min` here + # so they can just take an average of (`exp` of) the `logα` values. + logα = min(lp_new - lp, zero(lp)) + isaccept = Random.randexp(rng) > -logα + + return x_new, lp_new, U, logα, isaccept +end + +function ram_adapt( + sampler::RobustAdaptiveMetropolis, + state::RobustAdaptiveMetropolisState, + logα::Real, + U::AbstractVector, +) + Δα = exp(logα) - sampler.α + S = state.S + # TODO: Make this configurable by defining a more general path. + η = state.iteration^(-sampler.γ) + ΔS = (η * abs(Δα)) * S * U / LinearAlgebra.norm(U) + # TODO: Maybe do in-place and then have the user extract it with a callback if they really want it. + S_new = if sign(Δα) == 1 + # One rank update. + LinearAlgebra.lowrankupdate(LinearAlgebra.Cholesky(S.data, :L, 0), ΔS).L + else + # One rank downdate. + LinearAlgebra.lowrankdowndate(LinearAlgebra.Cholesky(S.data, :L, 0), ΔS).L + end + return S_new, η +end + +function AbstractMCMC.step( + rng::Random.AbstractRNG, + model::AbstractMCMC.LogDensityModel, + sampler::RobustAdaptiveMetropolis; + initial_params=nothing, + kwargs..., +) + # This is the initial state. + f = model.logdensity + d = LogDensityProblems.dimension(f) + + # Initial parameter state. + T = if initial_params === nothing + eltype(sampler.γ) + else + Base.promote_type(eltype(sampler.γ), eltype(initial_params)) + end + x = if initial_params === nothing + randn(rng, T, d) + else + convert(AbstractVector{T}, initial_params) + end + # Initialize the Cholesky factor of the covariance matrix. + S_data = if sampler.S === nothing + LinearAlgebra.diagm(0 => ones(T, d)) + else + # Check the dimensionality of the provided `S`. + if size(sampler.S) != (d, d) + throw(ArgumentError("The provided `S` has the wrong dimensionality.")) + end + convert(AbstractMatrix{T}, sampler.S) + end + S = LinearAlgebra.LowerTriangular(S_data) + + # Construct the initial state. + lp = LogDensityProblems.logdensity(f, x) + state = RobustAdaptiveMetropolisState(x, lp, S, zero(T), 0, 1, true) + + return AdvancedMH.Transition(x, lp, true), state +end + +function AbstractMCMC.step( + rng::Random.AbstractRNG, + model::AbstractMCMC.LogDensityModel, + sampler::RobustAdaptiveMetropolis, + state::RobustAdaptiveMetropolisState; + kwargs..., +) + # Take the inner step. + x_new, lp_new, U, logα, isaccept = ram_step_inner(rng, model, sampler, state) + # Accept / reject the proposal. + state_new = RobustAdaptiveMetropolisState( + isaccept ? x_new : state.x, + isaccept ? lp_new : state.logprob, + state.S, + logα, + state.η, + state.iteration + 1, + isaccept, + ) + return AdvancedMH.Transition(state_new.x, state_new.logprob, state_new.isaccept), + state_new +end + +function valid_eigenvalues(S, lower_bound, upper_bound) + # Short-circuit if the bounds are the default. + (lower_bound == 0 && upper_bound == Inf) && return true + # Note that this is just the diagonal when `S` is triangular. + eigenvals = LinearAlgebra.eigvals(S) + return all(x -> lower_bound <= x <= upper_bound, eigenvals) +end + +function AbstractMCMC.step_warmup( + rng::Random.AbstractRNG, + model::AbstractMCMC.LogDensityModel, + sampler::RobustAdaptiveMetropolis, + state::RobustAdaptiveMetropolisState; + kwargs..., +) + # Take the inner step. + x_new, lp_new, U, logα, isaccept = ram_step_inner(rng, model, sampler, state) + # Adapt the proposal. + S_new, η = ram_adapt(sampler, state, logα, U) + # Check that `S_new` has eigenvalues in the desired range. + if !valid_eigenvalues( + S_new, sampler.eigenvalue_lower_bound, sampler.eigenvalue_upper_bound + ) + # In this case, we just keep the old `S` (p. 13 in Vihola, 2012). + S_new = state.S + end + + # Update state. + state_new = RobustAdaptiveMetropolisState( + isaccept ? x_new : state.x, + isaccept ? lp_new : state.logprob, + S_new, + logα, + η, + state.iteration + 1, + isaccept, + ) + return AdvancedMH.Transition(state_new.x, state_new.logprob, state_new.isaccept), + state_new +end diff --git a/test/RobustAdaptiveMetropolis.jl b/test/RobustAdaptiveMetropolis.jl new file mode 100644 index 0000000..fe82f14 --- /dev/null +++ b/test/RobustAdaptiveMetropolis.jl @@ -0,0 +1,72 @@ +struct Gaussian{A} + Σ::A +end +LogDensityProblems.dimension(model::Gaussian) = size(model.Σ, 1) +LogDensityProblems.capabilities(::Gaussian) = LogDensityProblems.LogDensityOrder{0}() +function LogDensityProblems.logdensity(model::Gaussian, x) + d = LogDensityProblems.dimension(model) + return logpdf(MvNormal(zeros(d), model.Σ), x) +end + +Base.@kwdef struct StatesExtractor{A} + states::A = Vector{Any}() +end +function (callback::StatesExtractor)( + rng, + model, + sampler, + sample, + state, + iteration; + kwargs..., +) + if iteration == 1 + empty!(callback.states) + end + + push!(callback.states, state) +end + +@testset "RobustAdaptiveMetropolis" begin + # Testing of sampling is done in the docstring. Here we test explicit properties of the sampler. + @testset "eigenvalue bounds" begin + for (σ², lower_bound, upper_bound) in [ + (10.0, 0.5, 1.1), # should hit upper bound easily + (0.01, 0.5, 1.1), # should hit lower bound easily + ] + ρ = σ² / 2 + model = Gaussian([σ² ρ; ρ σ²]) + callback = StatesExtractor() + + # Use aggressive adaptation. + sampler = + RobustAdaptiveMetropolis(γ = 0.51, eigenvalue_lower_bound = 0.9, eigenvalue_upper_bound = 1.1) + num_warmup = 1000 + discard_initial = 0 # we're only keeping the warmup samples + chain = sample( + model, + sampler, + num_warmup; + chain_type = Chains, + num_warmup, + discard_initial, + progress = false, + initial_params = zeros(2), + callback = callback, + ) + S_samples = getproperty.(callback.states, :S) + eigval_min = map(minimum, eachrow(mapreduce(eigvals, hcat, S_samples))) + eigval_max = map(maximum, eachrow(mapreduce(eigvals, hcat, S_samples))) + @test all(>=(sampler.eigenvalue_lower_bound), eigval_min) + @test all(<=(sampler.eigenvalue_upper_bound), eigval_max) + + if σ² < lower_bound + # We should hit the lower bound. + @test all(≈(sampler.eigenvalue_lower_bound, atol = 0.05), eigval_min) + else + # We should hit the upper bound. + @test all(≈(sampler.eigenvalue_upper_bound, atol = 0.05), eigval_max) + end + end + end +end diff --git a/test/runtests.jl b/test/runtests.jl index fd06296..76a1631 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -366,4 +366,6 @@ include("util.jl") end @testset "EMCEE" begin include("emcee.jl") end + + include("RobustAdaptiveMetropolis.jl") end