From 8ff3fe2e1c2d8dcd4740719b575785ffd2a85e7e Mon Sep 17 00:00:00 2001 From: John Pearson Date: Thu, 21 Jan 2016 17:52:40 -0500 Subject: [PATCH] added params to multivariate and matrix vars - Also mixtures and truncated distributions - Tests enforce contract typeof(d)(params(d)...) == d for isa(d, Distribution) - Similar contracts hold for mixtures and truncated distributions --- src/matrix/inversewishart.jl | 4 +-- src/matrix/wishart.jl | 4 +-- src/mixtures/mixturemodel.jl | 1 + src/mixtures/unigmm.jl | 2 ++ src/multivariate/dirichlet.jl | 1 + src/multivariate/mvnormal.jl | 1 + src/multivariate/mvnormalcanon.jl | 1 + src/multivariate/mvtdist.jl | 10 ++++--- src/multivariate/vonmisesfisher.jl | 10 +++---- src/testutils.jl | 21 +++++++++++++++ .../discrete/noncentralhypergeometric.jl | 2 ++ test/dirichlet.jl | 1 + test/matrix.jl | 1 + test/mixture.jl | 15 +++++++++++ test/multinomial.jl | 2 +- test/mvnormal.jl | 27 +++++++++---------- test/mvtdist.jl | 1 + test/noncentralhypergeometric.jl | 4 ++- test/vonmisesfisher.jl | 2 +- 19 files changed, 79 insertions(+), 31 deletions(-) diff --git a/src/matrix/inversewishart.jl b/src/matrix/inversewishart.jl index 4e9cee2ec..655e07e85 100644 --- a/src/matrix/inversewishart.jl +++ b/src/matrix/inversewishart.jl @@ -11,7 +11,7 @@ end #### Constructors -function InverseWishart{ST<:AbstractPDMat}(df::Real, Ψ::ST) +function InverseWishart{ST <: AbstractPDMat}(df::Real, Ψ::ST) p = dim(Ψ) df > p - 1 || error("df should be greater than dim - 1.") InverseWishart{ST}(df, Ψ, _invwishart_c0(df, Ψ)) @@ -35,7 +35,7 @@ insupport(d::InverseWishart, X::Matrix{Float64}) = size(X) == size(d) && isposde dim(d::InverseWishart) = dim(d.Ψ) size(d::InverseWishart) = (p = dim(d); (p, p)) - +params(d::InverseWishart) = (d.df, d.Ψ, d.c0) #### Show diff --git a/src/matrix/wishart.jl b/src/matrix/wishart.jl index 934e903b7..16748df2e 100644 --- a/src/matrix/wishart.jl +++ b/src/matrix/wishart.jl @@ -11,7 +11,7 @@ end #### Constructors -function Wishart{ST<:AbstractPDMat}(df::Real, S::ST) +function Wishart{ST <: AbstractPDMat}(df::Real, S::ST) p = dim(S) df > p - 1 || error("df should be greater than dim - 1.") Wishart{ST}(df, S, _wishart_c0(df, S)) @@ -35,7 +35,7 @@ insupport(d::Wishart, X::Matrix{Float64}) = size(X) == size(d) && isposdef(X) dim(d::Wishart) = dim(d.S) size(d::Wishart) = (p = dim(d); (p, p)) - +params(d::Wishart) = (d.df, d.S, d.c0) #### Show diff --git a/src/mixtures/mixturemodel.jl b/src/mixtures/mixturemodel.jl index e46b261da..abb8d8ab4 100644 --- a/src/mixtures/mixturemodel.jl +++ b/src/mixtures/mixturemodel.jl @@ -69,6 +69,7 @@ components(d::MixtureModel) = d.components component(d::MixtureModel, k::Int) = d.components[k] probs(d::MixtureModel) = probs(d.prior) +params(d::MixtureModel) = ([params(c) for c in d.components], params(d.prior)[1]) function mean(d::UnivariateMixture) K = ncomponents(d) diff --git a/src/mixtures/unigmm.jl b/src/mixtures/unigmm.jl index 307aa2be1..ce2aec2ae 100644 --- a/src/mixtures/unigmm.jl +++ b/src/mixtures/unigmm.jl @@ -25,6 +25,8 @@ mean(d::UnivariateGMM) = dot(d.means, probs(d)) rand(d::UnivariateGMM) = (k = rand(d.prior); d.means[k] + randn() * d.stds[k]) +params(d::UnivariateGMM) = (d.means, d.stds, d.prior) + immutable UnivariateGMMSampler <: Sampleable{Univariate,Continuous} means::Vector{Float64} stds::Vector{Float64} diff --git a/src/multivariate/dirichlet.jl b/src/multivariate/dirichlet.jl index cdb50fc07..6dda412d0 100644 --- a/src/multivariate/dirichlet.jl +++ b/src/multivariate/dirichlet.jl @@ -38,6 +38,7 @@ Base.show(io::IO, d::Dirichlet) = show(io, d, (:alpha,)) length(d::Dirichlet) = length(d.alpha) mean(d::Dirichlet) = d.alpha .* inv(d.alpha0) +params(d::Dirichlet) = (d.alpha,) function var(d::Dirichlet) α = d.alpha diff --git a/src/multivariate/mvnormal.jl b/src/multivariate/mvnormal.jl index a8da08525..bd6f97687 100644 --- a/src/multivariate/mvnormal.jl +++ b/src/multivariate/mvnormal.jl @@ -109,6 +109,7 @@ Base.show(io::IO, d::MvNormal) = length(d::MvNormal) = length(d.μ) mean(d::MvNormal) = convert(Vector{Float64}, d.μ) +params(d::MvNormal) = (d.μ, d.Σ) var(d::MvNormal) = diag(d.Σ) cov(d::MvNormal) = full(d.Σ) diff --git a/src/multivariate/mvnormalcanon.jl b/src/multivariate/mvnormalcanon.jl index 17b8179c9..a8ff02e9b 100644 --- a/src/multivariate/mvnormalcanon.jl +++ b/src/multivariate/mvnormalcanon.jl @@ -65,6 +65,7 @@ canonform{C}(d::MvNormal{C,ZeroVector{Float64}}) = MvNormalCanon(inv(d.Σ)) length(d::MvNormalCanon) = length(d.μ) mean(d::MvNormalCanon) = convert(Vector{Float64}, d.μ) +params(d::MvNormalCanon) = (d.μ, d.h, d.J) var(d::MvNormalCanon) = diag(inv(d.J)) cov(d::MvNormalCanon) = full(inv(d.J)) diff --git a/src/multivariate/mvtdist.jl b/src/multivariate/mvtdist.jl index 58d6e141c..ec1823f4f 100644 --- a/src/multivariate/mvtdist.jl +++ b/src/multivariate/mvtdist.jl @@ -31,7 +31,7 @@ end function GenericMvTDist{Cov<:AbstractPDMat}(df::Float64, Σ::Cov) d = dim(Σ) - GenericMvTDist{Cov}(df, d, true, zeros(d), Σ) + GenericMvTDist{Cov}(df, d, true, zeros(d), Σ) end ## Construction of multivariate normal with specific covariance type @@ -80,6 +80,8 @@ invscale(d::GenericMvTDist) = full(inv(d.Σ)) invcov(d::GenericMvTDist) = d.df>2 ? ((d.df-2)/d.df)*full(inv(d.Σ)) : NaN*ones(d.dim, d.dim) logdet_cov(d::GenericMvTDist) = d.df>2 ? logdet((d.df/(d.df-2))*d.Σ) : NaN +params(d::GenericMvTDist) = (d.df, d.μ, d.Σ) + # For entropy calculations see "Multivariate t Distributions and their Applications", S. Kotz & S. Nadarajah function entropy(d::GenericMvTDist) hdf, hdim = 0.5*d.df, 0.5*d.dim @@ -89,12 +91,12 @@ end # evaluation (for GenericMvTDist) -insupport{T<:Real}(d::AbstractMvTDist, x::AbstractVector{T}) = +insupport{T<:Real}(d::AbstractMvTDist, x::AbstractVector{T}) = length(d) == length(x) && allfinite(x) -function sqmahal{T<:Real}(d::GenericMvTDist, x::DenseVector{T}) +function sqmahal{T<:Real}(d::GenericMvTDist, x::DenseVector{T}) z::Vector{Float64} = d.zeromean ? x : x - d.μ - invquad(d.Σ, z) + invquad(d.Σ, z) end function sqmahal!{T<:Real}(r::DenseArray, d::GenericMvTDist, x::DenseMatrix{T}) diff --git a/src/multivariate/vonmisesfisher.jl b/src/multivariate/vonmisesfisher.jl index 35ab5cb9c..07c2c4e66 100644 --- a/src/multivariate/vonmisesfisher.jl +++ b/src/multivariate/vonmisesfisher.jl @@ -42,7 +42,7 @@ meandir(d::VonMisesFisher) = d.μ concentration(d::VonMisesFisher) = d.κ insupport{T<:Real}(d::VonMisesFisher, x::DenseVector{T}) = isunitvec(x) - +params(d::VonMisesFisher) = (d.μ, d.κ) ### Evaluation @@ -51,13 +51,13 @@ function _vmflck(p, κ) q = hp - 1.0 q * log(κ) - hp * log(2π) - log(besseli(q, κ)) end -_vmflck3(κ) = log(κ) - log2π - κ - log1mexp(-2.0 * κ) +_vmflck3(κ) = log(κ) - log2π - κ - log1mexp(-2.0 * κ) vmflck(p, κ) = (p == 3 ? _vmflck3(κ) : _vmflck(p, κ))::Float64 _logpdf{T<:Real}(d::VonMisesFisher, x::DenseVector{T}) = d.logCκ + d.κ * dot(d.μ, x) -### Sampling +### Sampling sampler(d::VonMisesFisher) = VonMisesFisherSampler(d.μ, d.κ) @@ -106,6 +106,4 @@ function _vmf_estkappa(p::Int, ρ::Float64) return κ end -_vmfA(half_p::Float64, κ::Float64) = besseli(half_p, κ) / besseli(half_p - 1.0, κ) - - +_vmfA(half_p::Float64, κ::Float64) = besseli(half_p, κ) / besseli(half_p - 1.0, κ) diff --git a/src/testutils.jl b/src/testutils.jl index d394dae93..0caf7451a 100644 --- a/src/testutils.jl +++ b/src/testutils.jl @@ -35,6 +35,7 @@ function test_distr(distr::DiscreteUnivariateDistribution, n::Int) test_stats(distr, vs) test_samples(distr, n) + test_params(distr) end @@ -50,6 +51,7 @@ function test_distr(distr::ContinuousUnivariateDistribution, n::Int) xs = test_samples(distr, n) allow_test_stats(distr) && test_stats(distr, xs) + test_params(distr) end @@ -505,3 +507,22 @@ function test_stats(d::ContinuousUnivariateDistribution, xs::AbstractVector{Floa end end end + +function test_params(d::Distribution) + # simply test that params returns something sufficient to + # reconstruct d + D = typeof(d) + pars = params(d) + d_new = D(pars...) + @test d_new == d +end + +function test_params(d::Truncated) + # simply test that params returns something sufficient to + # reconstruct d + d_unt = d.untruncated + D = typeof(d_unt) + pars = params(d_unt) + d_new = Truncated(D(pars...), d.lower, d.upper) + @test d_new == d +end diff --git a/src/univariate/discrete/noncentralhypergeometric.jl b/src/univariate/discrete/noncentralhypergeometric.jl index bfee2f00e..e28945637 100644 --- a/src/univariate/discrete/noncentralhypergeometric.jl +++ b/src/univariate/discrete/noncentralhypergeometric.jl @@ -32,6 +32,8 @@ function quantile(d::NoncentralHypergeometric, q::Float64) end end +params(d::NoncentralHypergeometric) = (d.ns, d.nf, d.n, d.ω) + ## Fisher's noncentral hypergeometric distribution immutable FisherNoncentralHypergeometric <: NoncentralHypergeometric diff --git a/test/dirichlet.jl b/test/dirichlet.jl index 89e6fdbf1..bd5e85b55 100644 --- a/test/dirichlet.jl +++ b/test/dirichlet.jl @@ -35,6 +35,7 @@ d = Dirichlet(v) @test length(d) == length(v) @test d.alpha == v @test d.alpha0 == sum(v) +@test d == typeof(d)(params(d)...) @test_approx_eq mean(d) v / sum(v) @test_approx_eq cov(d) [8 -2 -6; -2 5 -3; -6 -3 9] / (36 * 7) diff --git a/test/matrix.jl b/test/matrix.jl index dd51c641c..91b333622 100644 --- a/test/matrix.jl +++ b/test/matrix.jl @@ -11,6 +11,7 @@ IW = InverseWishart(v,S) for d in [W,IW] @test size(d) == size(rand(d)) @test length(d) == length(rand(d)) + @test typeof(d)(params(d)...) == d end @test_approx_eq_eps mean(rand(W,100000)) mean(W) 0.1 diff --git a/test/mixture.jl b/test/mixture.jl index 198cfbeed..389463045 100644 --- a/test/mixture.jl +++ b/test/mixture.jl @@ -121,7 +121,19 @@ function test_mixture(g::MultivariateMixture, n::Int, ns::Int) @test_approx_eq_eps cov(Xs, vardim=2) cov(g) 0.01 end +function test_params(g::AbstractMixtureModel) + C = eltype(g.components) + pars = params(g) + mm = MixtureModel(C, pars...) + @test g.prior == mm.prior + @test g.components == mm.components +end +function test_params(g::UnivariateGMM) + pars = params(g) + mm = UnivariateGMM(pars...) + @test g == mm +end # Tests @@ -131,11 +143,13 @@ g_u = MixtureModel(Normal, [(0.0, 1.0), (2.0, 1.0), (-4.0, 1.5)], [0.2, 0.5, 0.3 @test isa(g_u, MixtureModel{Univariate, Continuous, Normal}) @test ncomponents(g_u) == 3 test_mixture(g_u, 1000, 10^6) +test_params(g_u) g_u = UnivariateGMM([0.0, 2.0, -4.0], [1.0, 1.2, 1.5], Categorical([0.2, 0.5, 0.3])) @test isa(g_u, UnivariateGMM) @test ncomponents(g_u) == 3 test_mixture(g_u, 1000, 10^6) +test_params(g_u) println(" testing MultivariateMixture") g_m = MixtureModel( @@ -147,3 +161,4 @@ g_m = MixtureModel( @test length(components(g_m)) == 3 @test length(g_m) == 2 test_mixture(g_m, 1000, 10^6) +test_params(g_m) diff --git a/test/multinomial.jl b/test/multinomial.jl index e9f282c80..61190108f 100644 --- a/test/multinomial.jl +++ b/test/multinomial.jl @@ -26,6 +26,7 @@ x = rand(d) @test insupport(d, x) @test size(x) == size(d) @test length(x) == length(d) +@test d == typeof(d)(params(d)...) x = rand(d, 100) @test isa(x, Matrix{Int}) @@ -86,4 +87,3 @@ r = fit_mle(Multinomial, x, fill(2.0, size(x,2))) @test r.n == nt @test length(r) == length(p) @test_approx_eq_eps probs(r) p 0.02 - diff --git a/test/mvnormal.jl b/test/mvnormal.jl index 6f602038a..46415db94 100644 --- a/test/mvnormal.jl +++ b/test/mvnormal.jl @@ -11,7 +11,7 @@ import Distributions: distrname function test_mvnormal(g::AbstractMvNormal, n_tsamples::Int=10^6) d = length(g) - μ = mean(g) + μ = mean(g) Σ = cov(g) @test isa(μ, Vector{Float64}) @test isa(Σ, Matrix{Float64}) @@ -22,6 +22,7 @@ function test_mvnormal(g::AbstractMvNormal, n_tsamples::Int=10^6) ldcov = logdetcov(g) @test_approx_eq ldcov logdet(Σ) vs = diag(Σ) + @test g == typeof(g)(params(g)...) # sampling X = rand(g, n_tsamples) @@ -61,18 +62,18 @@ h = [1., 2., 3.] dv = [1.2, 3.4, 2.6] J = [4. -2. -1.; -2. 5. -1.; -1. -1. 6.] -for (T, g, μ, Σ) in [ - (IsoNormal, MvNormal(mu, sqrt(2.0)), mu, 2.0 * eye(3)), - (ZeroMeanIsoNormal, MvNormal(3, sqrt(2.0)), zeros(3), 2.0 * eye(3)), - (DiagNormal, MvNormal(mu, sqrt(va)), mu, diagm(va)), - (ZeroMeanDiagNormal, MvNormal(sqrt(va)), zeros(3), diagm(va)), - (FullNormal, MvNormal(mu, C), mu, C), +for (T, g, μ, Σ) in [ + (IsoNormal, MvNormal(mu, sqrt(2.0)), mu, 2.0 * eye(3)), + (ZeroMeanIsoNormal, MvNormal(3, sqrt(2.0)), zeros(3), 2.0 * eye(3)), + (DiagNormal, MvNormal(mu, sqrt(va)), mu, diagm(va)), + (ZeroMeanDiagNormal, MvNormal(sqrt(va)), zeros(3), diagm(va)), + (FullNormal, MvNormal(mu, C), mu, C), (ZeroMeanFullNormal, MvNormal(C), zeros(3), C), (IsoNormalCanon, MvNormalCanon(h, 2.0), h / 2.0, 0.5 * eye(3)), (ZeroMeanIsoNormalCanon, MvNormalCanon(3, 2.0), zeros(3), 0.5 * eye(3)), (DiagNormalCanon, MvNormalCanon(h, dv), h ./ dv, diagm(1.0 ./ dv)), (ZeroMeanDiagNormalCanon, MvNormalCanon(dv), zeros(3), diagm(1.0 ./ dv)), - (FullNormalCanon, MvNormalCanon(h, J), J \ h, inv(J)), + (FullNormalCanon, MvNormalCanon(h, J), J \ h, inv(J)), (ZeroMeanFullNormalCanon, MvNormalCanon(J), zeros(3), inv(J)) ] println(" testing $(distrname(g))") @@ -80,16 +81,16 @@ for (T, g, μ, Σ) in [ @test isa(g, T) @test_approx_eq mean(g) μ @test_approx_eq cov(g) Σ - test_mvnormal(g) + test_mvnormal(g) # conversion between mean form and canonical form if isa(g, MvNormal) - gc = canonform(g) + gc = canonform(g) @test isa(gc, MvNormalCanon) @test length(gc) == length(g) @test_approx_eq mean(gc) mean(g) @test_approx_eq cov(gc) cov(g) - else + else @assert isa(g, MvNormalCanon) gc = meanform(g) @test isa(gc, MvNormal) @@ -116,7 +117,7 @@ function _gauss_mle(x::Matrix{Float64}, w::Vector{Float64}) mu = (x * w) * (1/sw) z = x .- mu C = (z * scale(w, z')) * (1/sw) - Base.LinAlg.copytri!(C, 'U') + Base.LinAlg.copytri!(C, 'U') return mu, C end @@ -161,5 +162,3 @@ g = fit_mle(DiagNormal, x, w) @test isa(g, DiagNormal) @test_approx_eq g.μ uw @test_approx_eq g.Σ.diag diag(Cw) - - diff --git a/test/mvtdist.jl b/test/mvtdist.jl index ed4973f86..fcd08962c 100644 --- a/test/mvtdist.jl +++ b/test/mvtdist.jl @@ -23,4 +23,5 @@ df = [1., 2, 3, 5, 10] for i = 1:length(df) d = MvTDist(df[i], mu, Sigma) @test_approx_eq_eps logpdf(d, [-2., 3]) rvalues[i] 1.0e-8 + @test d == typeof(d)(params(d)...) end diff --git a/test/noncentralhypergeometric.jl b/test/noncentralhypergeometric.jl index dc7bdd0db..f694d0c6a 100644 --- a/test/noncentralhypergeometric.jl +++ b/test/noncentralhypergeometric.jl @@ -9,6 +9,7 @@ n = 100 # http://en.wikipedia.org/wiki/Fisher's_noncentral_hypergeometric_distribution ω = 10.0 d = FisherNoncentralHypergeometric(ns, nf, n, ω) +@test d == typeof(d)(params(d)...) @test_approx_eq_eps mean(d) 71.95759 1e-5 @test mode(d) == 72 @@ -50,6 +51,7 @@ n = 100 # http://en.wikipedia.org/wiki/Fisher's_noncentral_hypergeometric_distribution ω = 10.0 d = WalleniusNoncentralHypergeometric(ns, nf, n, ω) +@test d == typeof(d)(params(d)...) @test_approx_eq_eps mean(d) 78.82945 1e-5 @test mode(d) == 80 @@ -81,4 +83,4 @@ ref = Hypergeometric(ns,nf,n) @test_approx_eq_eps cdf(d, 51) cdf(ref, 51) 1e-7 @test_approx_eq_eps quantile(d, 0.05) quantile(ref, 0.05) 1e-7 @test_approx_eq_eps quantile(d, 0.95) quantile(ref, 0.95) 1e-7 -@test mode(d) == mode(ref) \ No newline at end of file +@test mode(d) == mode(ref) diff --git a/test/vonmisesfisher.jl b/test/vonmisesfisher.jl index c01a2b439..8ffda1db0 100644 --- a/test/vonmisesfisher.jl +++ b/test/vonmisesfisher.jl @@ -23,6 +23,7 @@ function test_vonmisesfisher(p::Int, κ::Float64, n::Int, ns::Int) @test length(d) == p @test meandir(d) == μ @test concentration(d) == κ + @test d == typeof(d)(params(d)...) # println(d) θ = κ * μ @@ -72,4 +73,3 @@ for (p, κ) in [(2, 1.0), test_vonmisesfisher(p, κ, n, ns) end -