From 0c01498dd4459eea9f8cb7dc247eebd9c921ee12 Mon Sep 17 00:00:00 2001 From: Douglas Bates Date: Fri, 4 Aug 2023 13:44:46 -0500 Subject: [PATCH 01/14] Add method to utility. Add correlations to pbtbl. --- src/bootstrap.jl | 169 ++++++++++++++++++++++++++++++++++++++- src/profile/utilities.jl | 4 +- 2 files changed, 171 insertions(+), 2 deletions(-) diff --git a/src/bootstrap.jl b/src/bootstrap.jl index 5161a6468..62f37c62e 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -521,6 +521,7 @@ end """ tidyσs(bsamp::MixedModelFitCollection) + Return a tidy (row)table with the estimates of the variance components (on the standard deviation scale) spread into columns of `iter`, `group`, `column` and `σ`. """ @@ -544,8 +545,9 @@ function tidyσs(bsamp::MixedModelFitCollection{T}) where {T} return result end +#= function pbstrtbl(bsamp::MixedModelFitCollection{T}) where {T} - (; fits, λ, inds) = bsamp + (; fits, λ) = bsamp row1 = first(fits) cnms = [:obj, :σ] pos = Dict{Symbol,UnitRange{Int}}(:obj => 1:1, :σ => 2:2) @@ -581,3 +583,168 @@ function pbstrtbl(bsamp::MixedModelFitCollection{T}) where {T} end return val end +=# + +""" + extractpars!(tup::NamedTuple, bsamp::MixedModelBootstrap{T}) where {T} + +Extract the named parameters from the `i`'th element of `bsamp` into `tup` +""" +function extractpars!(tup::NamedTuple, bsamp::MixedModelBootstrap{T}, i::Integer) where {T} + (; fits, λ) = bsamp + fiti = fits[i] # let it throw a BoundsError if i is outside the range + for (k, v) in pairs(tup) + if k == :obj + v[] = fiti.objective + elseif k == :iter + v[] = i + elseif k == :σ + v[] = coalesce(fiti.σ, one(T)) + elseif k == :β + copyto!(v, fiti.β) + elseif k == :θ + copyto!(v, fiti.θ) + elseif k == :σs + setθ!(bsamp, i) + vpos = 1 + σ = coalesce(fiti.σ, one(T)) + for l in λ + for λr in eachrow(l) + v[vpos] = σ * norm(λr) + vpos += 1 + end + end + else + throw(ArgumentError(string("Unknown extraction name: ", k))) + end + end + return tup +end + +_size1(m) = (first ∘ size)(m) + +_nrho(m) = (kchoose2 ∘ _size1)(m) + +function _appendsym!(syms::AbstractVector{Symbol}, dict::Dict{Symbol,UnitRange{Int}}, sym::Symbol, len::Integer) + lenp1 = length(syms) + 1 + append!(syms, _generatesyms(first(string(sym)), len)) + dict[sym] = lenp1:(length(syms)) + return syms +end + +function _prototypevec(bsamp::MixedModelBootstrap) + (; fits, λ) = bsamp + (; β, θ) = first(fits) + dict = Dict{Symbol,UnitRange{Int}}(:obj => 1:1, :σ => 2:2) + syms = [:obj, :σ] + _appendsym!(syms, dict, :β, length(β)) + _appendsym!(syms, dict, :σs, sum(_size1, λ)) + _appendsym!(syms, dict, :ρ, sum(_nrho, λ)) + _appendsym!(syms, dict, :θ, length(θ)) + return Tuple(syms), dict +end + +function _allpars!(v::AbstractVector{T}, bsamp::MixedModelBootstrap{T}, i::Integer, d::Dict{Symbol,UnitRange{Int}}) where {T} + fiti = bsamp.fits[i] + λ = bsamp.λ + v[1] = fiti.objective + v[2] = σ = coalesce(fiti.σ, one(T)) + copyto!(view(v, d[:β]), fiti.β) + copyto!(view(v, d[:θ]), fiti.θ) + k = first(d[:σs]) + setθ!(bsamp, i) + for l in λ + for λr in eachrow(l.data) + v[k] = σ * norm(λr) + k += 1 + end + end + drho = d[:ρ] + if !isempty(drho) + fill!(view(v, drho), zero(T)) + k = first(drho) + for ll in λ + lam = ll.data + ii = _size1(lam) + isone(ii) && continue + if isa(lam, Diagonal) + k += _nrho(lam) + else + for i in 1:ii + ri = normalize!(view(lam, i, :)) + for j in 1:(i - 1) + v[k] = dot(ri, view(lam, j, :)) + k += 1 + end + end + end + end + end + return v +end + +function pbstrtbl(bsamp::MixedModelFitCollection{T}) where {T} + (; fits, λ) = bsamp + syms, d = _prototypevec(bsamp) + nsym = length(syms) + val = NamedTuple{syms, NTuple{nsym, T}}[] + v = Vector{T}(undef, nsym) + for i in axes(fits, 1) + push!(val, NamedTuple{syms, NTuple{nsym, T}}(_allpars!(v, bsamp, i, d))) + end + return val +end + +""" + _rowlengths(v::AbstractVector, λ::Diagonal) + +Copy the row lengths (absolute value of the diagonal elements) of `λ`` into `v` +""" +function _rowlengths(v::AbstractVector{T}, λ::Diagonal{T}) where {T} + v .= abs.(λ.diag) + return v +end + +function _rowlengths(v::AbstractVector{T}, λ::LowerTriangular{T}) where {T} + dat = λ.data + k = size(dat, 1) + fill!(v, zero(T)) + v[1] = abs(first(dat)) + for i in 2:k + accum = zero(T) + for j in 1:i + accum += abs2(dat[i, j]) + end + v[i] = sqrt(accum) + end + return v +end + +""" + _rowdotprods(v::AbstractVector{T}, λ) + +Evaluate the dot products of rows of `λ` into v. The dot products are stored in column-major order of the upper triangle. +""" +function _rowdotprods(v::AbstractVector{T}, λ::Diagonal{T}) where {T} + fill!(v, zero(T)) + return v +end + +function _rowdotprods(v::AbstractVector{T}, λ::LowerTriangular{T}) where {T} + fill!(v, zero(T)) + dat = λ.data + k = size(dat, 1) + l = 1 + for i in 2:k + for j in 1:(i - 1) + accum = zero(T) + for ii in 1:j + accum += dat[i, ii] * dat[j, ii] + end + v[l] = accum + l += 1 + end + end + return v +end + diff --git a/src/profile/utilities.jl b/src/profile/utilities.jl index 27f63c4a0..f5a6963ac 100644 --- a/src/profile/utilities.jl +++ b/src/profile/utilities.jl @@ -24,10 +24,12 @@ Utility to generate a vector of Symbols of the form : from a tag and The indices are left-padded with zeros to allow lexicographic sorting. """ -function _generatesyms(tag::Char, len::Integer) +function _generatesyms(tag::AbstractString, len::Integer) return Symbol.(string.(tag, lpad.(Base.OneTo(len), ndigits(len), '0'))) end +_generatesyms(tag::Char, len::Integer) = _generatesyms(string(tag), len) + function TableColumns(m::LinearMixedModel{T}) where {T} nmvec = [:ζ] positions = Dict(:ζ => 1:1) From 0050e7b53c97dd8c667fff9d1f28658b4fe04154 Mon Sep 17 00:00:00 2001 From: Douglas Bates Date: Fri, 4 Aug 2023 13:51:05 -0500 Subject: [PATCH 02/14] Remove vestigial code --- src/bootstrap.jl | 76 ------------------------------------------------ 1 file changed, 76 deletions(-) diff --git a/src/bootstrap.jl b/src/bootstrap.jl index 62f37c62e..5252121c7 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -545,82 +545,6 @@ function tidyσs(bsamp::MixedModelFitCollection{T}) where {T} return result end -#= -function pbstrtbl(bsamp::MixedModelFitCollection{T}) where {T} - (; fits, λ) = bsamp - row1 = first(fits) - cnms = [:obj, :σ] - pos = Dict{Symbol,UnitRange{Int}}(:obj => 1:1, :σ => 2:2) - βsz = length(row1.β) - append!(cnms, _generatesyms('β', βsz)) - lastpos = 2 + βsz - pos[:β] = 3:lastpos - σsz = sum(m -> size(m, 1), bsamp.λ) - append!(cnms, _generatesyms('σ', σsz)) - pos[:σs] = (lastpos + 1):(lastpos + σsz) - lastpos += σsz - θsz = length(row1.θ) - append!(cnms, _generatesyms('θ', θsz)) - pos[:θ] = (lastpos + 1):(lastpos + θsz) - tblrowtyp = NamedTuple{(cnms...,),NTuple{length(cnms),T}} - val = sizehint!(tblrowtyp[], length(bsamp.fits)) - v = Vector{T}(undef, length(cnms)) - for (i, r) in enumerate(bsamp.fits) - v[1] = r.objective - v[2] = coalesce(r.σ, one(T)) - copyto!(view(v, pos[:β]), r.β) - fill!(view(v, pos[:σs]), zero(T)) - copyto!(view(v, pos[:θ]), r.θ) - setθ!(bsamp, i) - vpos = first(pos[:σs]) - for l in λ - for λr in eachrow(l) - v[vpos] = r.σ * norm(λr) - vpos += 1 - end - end - push!(val, tblrowtyp(v)) - end - return val -end -=# - -""" - extractpars!(tup::NamedTuple, bsamp::MixedModelBootstrap{T}) where {T} - -Extract the named parameters from the `i`'th element of `bsamp` into `tup` -""" -function extractpars!(tup::NamedTuple, bsamp::MixedModelBootstrap{T}, i::Integer) where {T} - (; fits, λ) = bsamp - fiti = fits[i] # let it throw a BoundsError if i is outside the range - for (k, v) in pairs(tup) - if k == :obj - v[] = fiti.objective - elseif k == :iter - v[] = i - elseif k == :σ - v[] = coalesce(fiti.σ, one(T)) - elseif k == :β - copyto!(v, fiti.β) - elseif k == :θ - copyto!(v, fiti.θ) - elseif k == :σs - setθ!(bsamp, i) - vpos = 1 - σ = coalesce(fiti.σ, one(T)) - for l in λ - for λr in eachrow(l) - v[vpos] = σ * norm(λr) - vpos += 1 - end - end - else - throw(ArgumentError(string("Unknown extraction name: ", k))) - end - end - return tup -end - _size1(m) = (first ∘ size)(m) _nrho(m) = (kchoose2 ∘ _size1)(m) From 758645d8a61f85bf3078272bd30ebb45d45774f6 Mon Sep 17 00:00:00 2001 From: Douglas Bates Date: Fri, 4 Aug 2023 17:09:37 -0500 Subject: [PATCH 03/14] Reformat and fix thinkos --- src/bootstrap.jl | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/src/bootstrap.jl b/src/bootstrap.jl index 5252121c7..78e7baaa1 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -549,7 +549,12 @@ _size1(m) = (first ∘ size)(m) _nrho(m) = (kchoose2 ∘ _size1)(m) -function _appendsym!(syms::AbstractVector{Symbol}, dict::Dict{Symbol,UnitRange{Int}}, sym::Symbol, len::Integer) +function _appendsym!( + syms::AbstractVector{Symbol}, + dict::Dict{Symbol,UnitRange{Int}}, + sym::Symbol, + len::Integer, +) lenp1 = length(syms) + 1 append!(syms, _generatesyms(first(string(sym)), len)) dict[sym] = lenp1:(length(syms)) @@ -568,7 +573,12 @@ function _prototypevec(bsamp::MixedModelBootstrap) return Tuple(syms), dict end -function _allpars!(v::AbstractVector{T}, bsamp::MixedModelBootstrap{T}, i::Integer, d::Dict{Symbol,UnitRange{Int}}) where {T} +function _allpars!( + v::AbstractVector{T}, + bsamp::MixedModelBootstrap{T}, + i::Integer, + d::Dict{Symbol,UnitRange{Int}}, +) where {T} fiti = bsamp.fits[i] λ = bsamp.λ v[1] = fiti.objective @@ -577,23 +587,28 @@ function _allpars!(v::AbstractVector{T}, bsamp::MixedModelBootstrap{T}, i::Integ copyto!(view(v, d[:θ]), fiti.θ) k = first(d[:σs]) setθ!(bsamp, i) - for l in λ - for λr in eachrow(l.data) - v[k] = σ * norm(λr) + if isa(λ, Diagonal) + for d in λ.diag + v[k] = σ * d k += 1 end + elseif isa(λ, LowerTriangular) + for l in λ + for λr in eachrow(l.data) + v[k] = σ * norm(λr) + k += 1 + end + end end drho = d[:ρ] if !isempty(drho) fill!(view(v, drho), zero(T)) k = first(drho) for ll in λ - lam = ll.data - ii = _size1(lam) - isone(ii) && continue - if isa(lam, Diagonal) - k += _nrho(lam) - else + if isa(ll, LowerTriangular{T}) + lam = ll.data + ii = _size1(lam) + isone(ii) && continue for i in 1:ii ri = normalize!(view(lam, i, :)) for j in 1:(i - 1) @@ -611,10 +626,10 @@ function pbstrtbl(bsamp::MixedModelFitCollection{T}) where {T} (; fits, λ) = bsamp syms, d = _prototypevec(bsamp) nsym = length(syms) - val = NamedTuple{syms, NTuple{nsym, T}}[] + val = NamedTuple{syms,NTuple{nsym,T}}[] v = Vector{T}(undef, nsym) for i in axes(fits, 1) - push!(val, NamedTuple{syms, NTuple{nsym, T}}(_allpars!(v, bsamp, i, d))) + push!(val, NamedTuple{syms,NTuple{nsym,T}}(_allpars!(v, bsamp, i, d))) end return val end @@ -668,7 +683,6 @@ function _rowdotprods(v::AbstractVector{T}, λ::LowerTriangular{T}) where {T} v[l] = accum l += 1 end - end + end return v end - From b4002c1a814c450f53d6c3fb9d3c8f49e4efb5a6 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Wed, 9 Aug 2023 16:51:36 -0400 Subject: [PATCH 04/14] typo --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index c3019459f..0dd81bc47 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ MixedModels v4.17.0 Release Notes ============================== -* New kwarg `amalgamate` can be used to disable amalgation of random0effects terms sharing a single grouping variable. Generally, `amalgamate=false` will result in a slower fit, but may improve convergence in some pathological cases. Note that this feature is experimental and changes to it are **not** considered breakin. [#673] +* New kwarg `amalgamate` can be used to disable amalgation of random0effects terms sharing a single grouping variable. Generally, `amalgamate=false` will result in a slower fit, but may improve convergence in some pathological cases. Note that this feature is experimental and changes to it are **not** considered breaking. [#673] * More informative error messages when passing a `Distribution` or `Link` type instead of the desired instance. [#698] * More informative error message on the intentional decision not to define methods for the coefficient of determination. [#698] * **EXPERIMENTAL** Return `finitial` when PIRLS drifts into a portion of the parameter space that yields a (numerically) invalid covariance matrix. This recovery strategy may be removed in a future release. [#616] From 8103602094acf0b3cd6de26a561815d8cc08ed0a Mon Sep 17 00:00:00 2001 From: Douglas Bates Date: Fri, 25 Aug 2023 07:53:17 -0500 Subject: [PATCH 05/14] bootstrap table as row table --- src/bootstrap.jl | 204 +++++++++++++++++++++-------------------------- 1 file changed, 89 insertions(+), 115 deletions(-) diff --git a/src/bootstrap.jl b/src/bootstrap.jl index 78e7baaa1..3d672ec72 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -253,7 +253,7 @@ of `iter`, `type`, `group`, `name` and `value`. a breaking change. """ function allpars(bsamp::MixedModelFitCollection{T}) where {T} - fits, λ, fcnames = bsamp.fits, bsamp.λ, bsamp.fcnames + (; fits, λ, fcnames) = bsamp npars = 2 + length(first(fits).β) + sum(map(k -> (k * (k + 1)) >> 1, size.(bsamp.λ, 2))) nresrow = length(fits) * npars cols = ( @@ -392,12 +392,12 @@ function Base.propertynames(bsamp::MixedModelFitCollection) end """ + setθ!(bsamp::MixedModelFitCollection, θ::AbstractVector) setθ!(bsamp::MixedModelFitCollection, i::Integer) Install the values of the i'th θ value of `bsamp.fits` in `bsamp.λ` """ -function setθ!(bsamp::MixedModelFitCollection, i::Integer) - θ = bsamp.fits[i].θ +function setθ!(bsamp::MixedModelFitCollection{T}, θ::AbstractVector{T}) where {T} offset = 0 for (λ, inds) in zip(bsamp.λ, bsamp.inds) λdat = _getdata(λ) @@ -410,6 +410,10 @@ function setθ!(bsamp::MixedModelFitCollection, i::Integer) return bsamp end +function setθ!(bsamp::MixedModelFitCollection, i::Integer) + return setθ!(bsamp, bsamp.θ[i]) +end + _getdata(x::Diagonal) = x _getdata(x::LowerTriangular) = x.data @@ -545,144 +549,114 @@ function tidyσs(bsamp::MixedModelFitCollection{T}) where {T} return result end -_size1(m) = (first ∘ size)(m) +_nρ(d::Diagonal) = 0 +_nρ(t::LowerTriangular) = kchoose2(size(t.data, 1)) -_nrho(m) = (kchoose2 ∘ _size1)(m) - -function _appendsym!( - syms::AbstractVector{Symbol}, - dict::Dict{Symbol,UnitRange{Int}}, - sym::Symbol, - len::Integer, -) - lenp1 = length(syms) + 1 - append!(syms, _generatesyms(first(string(sym)), len)) - dict[sym] = lenp1:(length(syms)) - return syms +function σρnms(λ) + σsyms = _generatesyms('σ', sum(first ∘ size, λ)) + ρsyms = _generatesyms('ρ', sum(_nρ, λ)) + val = Symbol[] + for l in λ + for _ in axes(l, 1) + push!(val, popfirst!(σsyms)) + end + for _ in 1:_nρ(l) + push!(val, popfirst!(ρsyms)) + end + end + return val end -function _prototypevec(bsamp::MixedModelBootstrap) +""" + _offsets(bsamp::MixedModelBootstrap) + +Return a Tuple{4,Int} of offsets for β, σρ, and θ values in the table columns + +The initial `2` is for the `:obj` and `:σ` columns. The last element is the total number of columns. +""" +function _offsets(bsamp::MixedModelBootstrap) (; fits, λ) = bsamp (; β, θ) = first(fits) - dict = Dict{Symbol,UnitRange{Int}}(:obj => 1:1, :σ => 2:2) + σρ = σρnms(λ) + lβ = length(β) + lθ = length(θ) syms = [:obj, :σ] - _appendsym!(syms, dict, :β, length(β)) - _appendsym!(syms, dict, :σs, sum(_size1, λ)) - _appendsym!(syms, dict, :ρ, sum(_nrho, λ)) - _appendsym!(syms, dict, :θ, length(θ)) - return Tuple(syms), dict + append!(syms, _generatesyms('β', lβ)) + append!(syms, σρnms(λ)) + append!(syms, _generatesyms('θ', lθ)) + return Tuple(syms), cumsum((2, lβ, length(σρ), lθ)) +end + +function σρ!(v::AbstractVector, off::Integer, d::Diagonal, σ) + diag = d.diag + diag *= σ + return copyto!(v, off + 1, d.diag) +end + +function σρ!(v::AbstractVector{T}, off::Integer, t::LowerTriangular{T}, σ::T) where {T} + dat = t.data + for i in axes(dat, 1) + ssqr = zero(T) + for j in 1:i + ssqr += abs2(dat[i, j]) + end + len = sqrt(ssqr) + v[off += 1] = σ * len + if len > 0 + for j in 1:i + dat[i, j] /= len + end + end + end + for i in axes(dat, 1) + for j in 1:(i - 1) + s = zero(T) + for k in 1:i + s += dat[i, k] * dat[j, k] + end + v[off += 1] = s + end + end + return v end function _allpars!( v::AbstractVector{T}, bsamp::MixedModelBootstrap{T}, i::Integer, - d::Dict{Symbol,UnitRange{Int}}, + offsets::NTuple{4,Int}, ) where {T} fiti = bsamp.fits[i] + setθ!(bsamp, i) λ = bsamp.λ v[1] = fiti.objective v[2] = σ = coalesce(fiti.σ, one(T)) - copyto!(view(v, d[:β]), fiti.β) - copyto!(view(v, d[:θ]), fiti.θ) - k = first(d[:σs]) - setθ!(bsamp, i) - if isa(λ, Diagonal) - for d in λ.diag - v[k] = σ * d - k += 1 - end - elseif isa(λ, LowerTriangular) - for l in λ - for λr in eachrow(l.data) - v[k] = σ * norm(λr) - k += 1 - end - end + off = 2 + for b in fiti.β + v[off += 1] = b end - drho = d[:ρ] - if !isempty(drho) - fill!(view(v, drho), zero(T)) - k = first(drho) - for ll in λ - if isa(ll, LowerTriangular{T}) - lam = ll.data - ii = _size1(lam) - isone(ii) && continue - for i in 1:ii - ri = normalize!(view(lam, i, :)) - for j in 1:(i - 1) - v[k] = dot(ri, view(lam, j, :)) - k += 1 - end - end - end - end +# copyto!(v, 3, fiti.β) + off = offsets[2] + for lam in λ + σρ!(v, off, lam, σ) + off += size(lam, 1) + _nρ(lam) + end + off = offsets[3] + for t in fiti.θ + v[off += 1] = t end +# copyto!(v, offsets[3] + 1, fiti.θ) return v end function pbstrtbl(bsamp::MixedModelFitCollection{T}) where {T} - (; fits, λ) = bsamp - syms, d = _prototypevec(bsamp) + fits = bsamp.fits + syms, offsets = _offsets(bsamp) nsym = length(syms) val = NamedTuple{syms,NTuple{nsym,T}}[] - v = Vector{T}(undef, nsym) + v = sizehint!(Vector{T}(undef, nsym), size(fits, 1)) for i in axes(fits, 1) - push!(val, NamedTuple{syms,NTuple{nsym,T}}(_allpars!(v, bsamp, i, d))) + push!(val, NamedTuple{syms,NTuple{nsym,T}}(_allpars!(v, bsamp, i, offsets))) end return val end - -""" - _rowlengths(v::AbstractVector, λ::Diagonal) - -Copy the row lengths (absolute value of the diagonal elements) of `λ`` into `v` -""" -function _rowlengths(v::AbstractVector{T}, λ::Diagonal{T}) where {T} - v .= abs.(λ.diag) - return v -end - -function _rowlengths(v::AbstractVector{T}, λ::LowerTriangular{T}) where {T} - dat = λ.data - k = size(dat, 1) - fill!(v, zero(T)) - v[1] = abs(first(dat)) - for i in 2:k - accum = zero(T) - for j in 1:i - accum += abs2(dat[i, j]) - end - v[i] = sqrt(accum) - end - return v -end - -""" - _rowdotprods(v::AbstractVector{T}, λ) - -Evaluate the dot products of rows of `λ` into v. The dot products are stored in column-major order of the upper triangle. -""" -function _rowdotprods(v::AbstractVector{T}, λ::Diagonal{T}) where {T} - fill!(v, zero(T)) - return v -end - -function _rowdotprods(v::AbstractVector{T}, λ::LowerTriangular{T}) where {T} - fill!(v, zero(T)) - dat = λ.data - k = size(dat, 1) - l = 1 - for i in 2:k - for j in 1:(i - 1) - accum = zero(T) - for ii in 1:j - accum += dat[i, ii] * dat[j, ii] - end - v[l] = accum - l += 1 - end - end - return v -end From c67d09d3ca1f9384ee8ae1cdf7707f8436b597b5 Mon Sep 17 00:00:00 2001 From: Douglas Bates Date: Fri, 25 Aug 2023 08:21:27 -0500 Subject: [PATCH 06/14] Suggestions from Juliaformatter --- src/bootstrap.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/bootstrap.jl b/src/bootstrap.jl index 3d672ec72..e817dc9fe 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -604,7 +604,7 @@ function σρ!(v::AbstractVector{T}, off::Integer, t::LowerTriangular{T}, σ::T) v[off += 1] = σ * len if len > 0 for j in 1:i - dat[i, j] /= len + dat[i, j] /= len end end end @@ -624,7 +624,7 @@ function _allpars!( v::AbstractVector{T}, bsamp::MixedModelBootstrap{T}, i::Integer, - offsets::NTuple{4,Int}, + offsets::NTuple{4,Int} ) where {T} fiti = bsamp.fits[i] setθ!(bsamp, i) @@ -635,7 +635,7 @@ function _allpars!( for b in fiti.β v[off += 1] = b end -# copyto!(v, 3, fiti.β) + # copyto!(v, 3, fiti.β) off = offsets[2] for lam in λ σρ!(v, off, lam, σ) @@ -645,7 +645,7 @@ function _allpars!( for t in fiti.θ v[off += 1] = t end -# copyto!(v, offsets[3] + 1, fiti.θ) + # copyto!(v, offsets[3] + 1, fiti.θ) return v end From 6926fa00a5b227039bc70177ee0ea20c90d54c3f Mon Sep 17 00:00:00 2001 From: Douglas Bates Date: Fri, 25 Aug 2023 12:13:39 -0500 Subject: [PATCH 07/14] Much faster to use a matrix. Needs tests. --- src/bootstrap.jl | 86 +++++++++++++++--------------------------------- 1 file changed, 27 insertions(+), 59 deletions(-) diff --git a/src/bootstrap.jl b/src/bootstrap.jl index e817dc9fe..a4367a783 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -567,33 +567,21 @@ function σρnms(λ) return val end -""" - _offsets(bsamp::MixedModelBootstrap) - -Return a Tuple{4,Int} of offsets for β, σρ, and θ values in the table columns - -The initial `2` is for the `:obj` and `:σ` columns. The last element is the total number of columns. -""" -function _offsets(bsamp::MixedModelBootstrap) +function _syms(bsamp::MixedModelBootstrap) (; fits, λ) = bsamp (; β, θ) = first(fits) - σρ = σρnms(λ) - lβ = length(β) - lθ = length(θ) - syms = [:obj, :σ] - append!(syms, _generatesyms('β', lβ)) + syms = [:obj] + append!(syms, _generatesyms('β', length(β))) + push!(syms, :σ) append!(syms, σρnms(λ)) - append!(syms, _generatesyms('θ', lθ)) - return Tuple(syms), cumsum((2, lβ, length(σρ), lθ)) + return append!(syms, _generatesyms('θ', length(θ))) end -function σρ!(v::AbstractVector, off::Integer, d::Diagonal, σ) - diag = d.diag - diag *= σ - return copyto!(v, off + 1, d.diag) +function σρ!(v::AbstractVector, d::Diagonal, σ) + return append!(v, σ .* d.diag) end -function σρ!(v::AbstractVector{T}, off::Integer, t::LowerTriangular{T}, σ::T) where {T} +function σρ!(v::AbstractVector{T}, t::LowerTriangular{T}, σ::T) where {T} dat = t.data for i in axes(dat, 1) ssqr = zero(T) @@ -601,7 +589,7 @@ function σρ!(v::AbstractVector{T}, off::Integer, t::LowerTriangular{T}, σ::T) ssqr += abs2(dat[i, j]) end len = sqrt(ssqr) - v[off += 1] = σ * len + push!(v, σ * len) if len > 0 for j in 1:i dat[i, j] /= len @@ -614,49 +602,29 @@ function σρ!(v::AbstractVector{T}, off::Integer, t::LowerTriangular{T}, σ::T) for k in 1:i s += dat[i, k] * dat[j, k] end - v[off += 1] = s + push!(v, s) end end return v end -function _allpars!( - v::AbstractVector{T}, - bsamp::MixedModelBootstrap{T}, - i::Integer, - offsets::NTuple{4,Int} -) where {T} - fiti = bsamp.fits[i] - setθ!(bsamp, i) - λ = bsamp.λ - v[1] = fiti.objective - v[2] = σ = coalesce(fiti.σ, one(T)) - off = 2 - for b in fiti.β - v[off += 1] = b - end - # copyto!(v, 3, fiti.β) - off = offsets[2] - for lam in λ - σρ!(v, off, lam, σ) - off += size(lam, 1) + _nρ(lam) - end - off = offsets[3] - for t in fiti.θ - v[off += 1] = t - end - # copyto!(v, offsets[3] + 1, fiti.θ) - return v -end - function pbstrtbl(bsamp::MixedModelFitCollection{T}) where {T} - fits = bsamp.fits - syms, offsets = _offsets(bsamp) - nsym = length(syms) - val = NamedTuple{syms,NTuple{nsym,T}}[] - v = sizehint!(Vector{T}(undef, nsym), size(fits, 1)) - for i in axes(fits, 1) - push!(val, NamedTuple{syms,NTuple{nsym,T}}(_allpars!(v, bsamp, i, offsets))) + (; fits, λ) = bsamp + syms = _syms(bsamp) + m = length(syms) + n = length(fits) + v = sizehint!(T[], m * n) + for f in fits + (; β, θ, σ) = f + push!(v, f.objective) + append!(v, β) + push!(v, σ) + setθ!(bsamp, θ) + for l in λ + σρ!(v, l, σ) + end + append!(v, θ) end - return val + m = collect(transpose(reshape(v, (m, n)))) + return Table(Tables.table(m; header = syms)) end From e1e1b6df8fc91535d1df88a576c28db9eaac5559 Mon Sep 17 00:00:00 2001 From: Douglas Bates Date: Sat, 26 Aug 2023 08:57:53 -0500 Subject: [PATCH 08/14] Update src/bootstrap.jl Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/bootstrap.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bootstrap.jl b/src/bootstrap.jl index a4367a783..08695c844 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -626,5 +626,5 @@ function pbstrtbl(bsamp::MixedModelFitCollection{T}) where {T} append!(v, θ) end m = collect(transpose(reshape(v, (m, n)))) - return Table(Tables.table(m; header = syms)) + return Table(Tables.table(m; header=syms)) end From 523fd06b58b1b47010bd2a069725e76da2f392cf Mon Sep 17 00:00:00 2001 From: Douglas Bates Date: Sat, 26 Aug 2023 12:45:37 -0500 Subject: [PATCH 09/14] Add tests, avoid allocating a vector. --- src/bootstrap.jl | 5 ++++- test/bootstrap.jl | 7 ++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/bootstrap.jl b/src/bootstrap.jl index 08695c844..01643e29e 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -578,7 +578,10 @@ function _syms(bsamp::MixedModelBootstrap) end function σρ!(v::AbstractVector, d::Diagonal, σ) - return append!(v, σ .* d.diag) + for dd in d.diag + push!(v, σ * dd) + end + return v end function σρ!(v::AbstractVector{T}, t::LowerTriangular{T}, σ::T) where {T} diff --git a/test/bootstrap.jl b/test/bootstrap.jl index f787a5d0c..8bd385495 100644 --- a/test/bootstrap.jl +++ b/test/bootstrap.jl @@ -184,7 +184,12 @@ end @test pb1 == restorereplicates(seekstart(io), MixedModels.unfit!(deepcopy(m1))) @test pb1 ≈ restorereplicates(seekstart(io), m1, Float16) rtol=1 - end + # these tests must be last because the tbl extractor changes the λ field in the sample + # Some day when we have a lot of energy we may want to omit the λ field from the equality comparisons + @test isa(pb0.tbl, Table) + @test isa(pb1.tbl, Table) + @test ncol(DataFrame(pb1.β)) == 3 +end @testset "Bernoulli simulate! and GLMM bootstrap" begin contra = dataset(:contra) From d4975c4dcfbc58a1a678ad794a420bb4aa8955a2 Mon Sep 17 00:00:00 2001 From: Douglas Bates Date: Sun, 27 Aug 2023 11:04:48 -0500 Subject: [PATCH 10/14] Update docs for bootstrap table --- docs/src/bootstrap.md | 70 +++++++++++++------------------------------ 1 file changed, 20 insertions(+), 50 deletions(-) diff --git a/docs/src/bootstrap.md b/docs/src/bootstrap.md index 018a024d8..3c13ccd68 100644 --- a/docs/src/bootstrap.md +++ b/docs/src/bootstrap.md @@ -24,10 +24,6 @@ parameter, `θ`, that defines the variance-covariance matrices of the random eff For example, a simple linear mixed-effects model for the `Dyestuff` data in the [`lme4`](http://github.com/lme4/lme4) package for [`R`](https://www.r-project.org) is fit by -```@setup Main -using DisplayAs -``` - ```@example Main using DataFrames using Gadfly # plotting package @@ -38,41 +34,29 @@ using Random ```@example Main dyestuff = MixedModels.dataset(:dyestuff) m1 = fit(MixedModel, @formula(yield ~ 1 + (1 | batch)), dyestuff) -DisplayAs.Text(ans) # hide ``` -To bootstrap the model parameters, first initialize a random number generator then create a bootstrap sample +To bootstrap the model parameters, first initialize a random number generator then create a bootstrap sample and extract the `tbl` property, which is a `Table` - a lightweight type of dataframe. ```@example Main const rng = MersenneTwister(1234321); samp = parametricbootstrap(rng, 10_000, m1); -df = DataFrame(samp.allpars); -first(df, 10) +tbl = samp.tbl ``` -Especially for those with a background in [`R`](https://www.R-project.org/) or [`pandas`](https://pandas.pydata.org), -the simplest way of accessing the parameter estimates in the parametric bootstrap object is to create a `DataFrame` from the `allpars` property as shown above. - -We can use `filter` to filter out relevant rows of a dataframe. A density plot of the estimates of `σ`, the residual standard deviation, can be created as ```@example Main -σres = filter(df) do row # create a thunk that operates on rows - row.type == "σ" && row.group == "residual" # our filtering rule -end - -plot(x = σres.value, Geom.density, Guide.xlabel("Parametric bootstrap estimates of σ")) +plot(x = tbl.σ, Geom.density, Guide.xlabel("Parametric bootstrap estimates of σ")) ``` -For the estimates of the intercept parameter, the `getproperty` extractor must be used + +or, for the intercept parameter ```@example Main -plot(filter(:type => ==("β"), df), x = :value, Geom.density, Guide.xlabel("Parametric bootstrap estimates of β₁")) +plot(x = tbl.β1, Geom.density, Guide.xlabel("Parametric bootstrap estimates of β₁")) ``` A density plot of the estimates of the standard deviation of the random effects is obtained as ```@example Main -σbatch = filter(df) do row # create a thunk that operates on rows - row.type == "σ" && row.group == "batch" # our filtering rule -end -plot(x = σbatch.value, Geom.density, +plot(x = tbl.σ1, Geom.density, Guide.xlabel("Parametric bootstrap estimates of σ₁")) ``` @@ -81,7 +65,7 @@ Although this mode appears to be diffuse, this is an artifact of the way that de In fact, it is a pulse, as can be seen from a histogram. ```@example Main -plot(x = σbatch.value, Geom.histogram, +plot(x = tbl.σ1, Geom.histogram, Guide.xlabel("Parametric bootstrap estimates of σ₁")) ``` @@ -93,14 +77,9 @@ The shortest such intervals, obtained with the `shortestcovint` extractor, corre shortestcovint ``` -We generate these for all random and fixed effects: +We generate these directly from the original bootstrap object: ```@example Main -combine(groupby(df, [:type, :group, :names]), :value => shortestcovint => :interval) -``` - -We can also generate this directly from the original bootstrap object: -```@example Main -DataFrame(shortestcovint(samp)) +Table(shortestcovint(samp)) ``` A value of zero for the standard deviation of the random effects is an example of a *singular* covariance. @@ -110,48 +89,39 @@ However, it is not as straightforward to detect singularity in vector-valued ran For example, if we bootstrap a model fit to the `sleepstudy` data ```@example Main sleepstudy = MixedModels.dataset(:sleepstudy) -m2 = fit( - MixedModel, - @formula(reaction ~ 1+days+(1+days|subj)), - sleepstudy, -) -DisplayAs.Text(ans) # hide +contrasts = Dict(:subj => Grouping()) +m2 = let f = @formula reaction ~ 1+days+(1+days|subj) + fit(MixedModel, f, sleepstudy; contrasts) +end ``` ```@example Main samp2 = parametricbootstrap(rng, 10_000, m2); -df2 = DataFrame(samp2.allpars); -first(df2, 10) +tbl2 = samp2.tbl ``` the singularity can be exhibited as a standard deviation of zero or as a correlation of $\pm1$. ```@example Main -DataFrame(shortestcovint(samp2)) +shortestcovint(samp2) ``` A histogram of the estimated correlations from the bootstrap sample has a spike at `+1`. ```@example Main -ρs = filter(df2) do row - row.type == "ρ" && row.group == "subj" -end -plot(x = ρs.value, Geom.histogram, +plot(x = tbl2.ρ1, Geom.histogram, Guide.xlabel("Parametric bootstrap samples of correlation of random effects")) ``` or, as a count, ```@example Main -count(ρs.value .≈ 1) +count(tbl2.ρ1 .≈ 1) ``` Close examination of the histogram shows a few values of `-1`. ```@example Main -count(ρs.value .≈ -1) +count(tbl2.ρ1 .≈ -1) ``` Furthermore there are even a few cases where the estimate of the standard deviation of the random effect for the intercept is zero. ```@example Main -σs = filter(df2) do row - row.type == "σ" && row.group == "subj" && row.names == "(Intercept)" -end -count(σs.value .≈ 0) +count(tbl2.σ1 .≈ 0) ``` There is a general condition to check for singularity of an estimated covariance matrix or matrices in a bootstrap sample. From b7e0491beed9b009976510ad23691efcf472a780 Mon Sep 17 00:00:00 2001 From: Douglas Bates Date: Mon, 28 Aug 2023 08:26:41 -0500 Subject: [PATCH 11/14] =?UTF-8?q?copy=20and=20restore=20=CE=BB=20in=20pbst?= =?UTF-8?q?rtbl?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/bootstrap.jl | 6 +++++- test/bootstrap.jl | 11 +++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/bootstrap.jl b/src/bootstrap.jl index 01643e29e..200859f97 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -448,7 +448,7 @@ Return the shortest interval containing `level` proportion for each parameter fr a breaking change. """ function shortestcovint(bsamp::MixedModelFitCollection{T}, level=0.95) where {T} - allpars = bsamp.allpars + allpars = bsamp.allpars # probably simpler to use .tbl instead of .allpars pars = unique(zip(allpars.type, allpars.group, allpars.names)) colnms = (:type, :group, :names, :lower, :upper) @@ -613,6 +613,7 @@ end function pbstrtbl(bsamp::MixedModelFitCollection{T}) where {T} (; fits, λ) = bsamp + λcp = copy.(λ) syms = _syms(bsamp) m = length(syms) n = length(fits) @@ -629,5 +630,8 @@ function pbstrtbl(bsamp::MixedModelFitCollection{T}) where {T} append!(v, θ) end m = collect(transpose(reshape(v, (m, n)))) + for k in eachindex(λ, λcp) # restore original contents of λ + copyto!(λ[k], λcp[k]) + end return Table(Tables.table(m; header=syms)) end diff --git a/test/bootstrap.jl b/test/bootstrap.jl index 8bd385495..9797a89ee 100644 --- a/test/bootstrap.jl +++ b/test/bootstrap.jl @@ -156,7 +156,11 @@ end pb0 = quickboot(m0) pb1 = quickboot(m1) savereplicates(io, pb1) - # wrong model`` + @test isa(pb0.tbl, Table) + @test isa(pb1.tbl, Table) # create tbl here to check it doesn't modify pb1 + @test ncol(DataFrame(pb1.β)) == 3 + + # wrong model @test_throws ArgumentError restorereplicates(seekstart(io), m0) # need to specify an eltype! @test_throws MethodError restorereplicates(seekstart(io), m1, MixedModelBootstrap) @@ -184,11 +188,6 @@ end @test pb1 == restorereplicates(seekstart(io), MixedModels.unfit!(deepcopy(m1))) @test pb1 ≈ restorereplicates(seekstart(io), m1, Float16) rtol=1 - # these tests must be last because the tbl extractor changes the λ field in the sample - # Some day when we have a lot of energy we may want to omit the λ field from the equality comparisons - @test isa(pb0.tbl, Table) - @test isa(pb1.tbl, Table) - @test ncol(DataFrame(pb1.β)) == 3 end @testset "Bernoulli simulate! and GLMM bootstrap" begin From b9154b5b3012974970335da86803661aebc69f9f Mon Sep 17 00:00:00 2001 From: Douglas Bates Date: Tue, 29 Aug 2023 14:10:47 -0500 Subject: [PATCH 12/14] Incorporate suggestions from @palday --- NEWS.md | 5 +++++ docs/src/bootstrap.md | 2 +- src/bootstrap.jl | 16 +++++++++------- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/NEWS.md b/NEWS.md index 5e73643ba..4497e5bcd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +MixedModels v4.20.0 Release Notes +============================== +* The `.tbl` property of a `MixedModelBootstrap` now includes the correlation parameters for lower triangular elements of the `λ` field. [#702] + MixedModels v4.19.0 Release Notes ============================== * New method `StatsAPI.coefnames(::ReMat)` returns the coefficient names associated with each grouping factor. [#709] @@ -463,6 +467,7 @@ Package dependencies [#682]: https://github.com/JuliaStats/MixedModels.jl/issues/682 [#694]: https://github.com/JuliaStats/MixedModels.jl/issues/694 [#698]: https://github.com/JuliaStats/MixedModels.jl/issues/698 +[#702]: https://github.com/JuliaStats/MixedModels.jl/issues/702 [#703]: https://github.com/JuliaStats/MixedModels.jl/issues/703 [#707]: https://github.com/JuliaStats/MixedModels.jl/issues/707 [#709]: https://github.com/JuliaStats/MixedModels.jl/issues/709 diff --git a/docs/src/bootstrap.md b/docs/src/bootstrap.md index 3c13ccd68..dfb80b2b8 100644 --- a/docs/src/bootstrap.md +++ b/docs/src/bootstrap.md @@ -36,7 +36,7 @@ dyestuff = MixedModels.dataset(:dyestuff) m1 = fit(MixedModel, @formula(yield ~ 1 + (1 | batch)), dyestuff) ``` -To bootstrap the model parameters, first initialize a random number generator then create a bootstrap sample and extract the `tbl` property, which is a `Table` - a lightweight type of dataframe. +To bootstrap the model parameters, first initialize a random number generator then create a bootstrap sample and extract the `tbl` property, which is a `Table` - a lightweight dataframe-like object. ```@example Main const rng = MersenneTwister(1234321); diff --git a/src/bootstrap.jl b/src/bootstrap.jl index 200859f97..afcb7c712 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -448,7 +448,7 @@ Return the shortest interval containing `level` proportion for each parameter fr a breaking change. """ function shortestcovint(bsamp::MixedModelFitCollection{T}, level=0.95) where {T} - allpars = bsamp.allpars # probably simpler to use .tbl instead of .allpars + allpars = bsamp.allpars # TODO probably simpler to use .tbl instead of .allpars pars = unique(zip(allpars.type, allpars.group, allpars.names)) colnms = (:type, :group, :names, :lower, :upper) @@ -555,7 +555,7 @@ _nρ(t::LowerTriangular) = kchoose2(size(t.data, 1)) function σρnms(λ) σsyms = _generatesyms('σ', sum(first ∘ size, λ)) ρsyms = _generatesyms('ρ', sum(_nρ, λ)) - val = Symbol[] + val = sizehint!(Symbol[], length(σsyms) + length(ρsyms)) for l in λ for _ in axes(l, 1) push!(val, popfirst!(σsyms)) @@ -578,13 +578,15 @@ function _syms(bsamp::MixedModelBootstrap) end function σρ!(v::AbstractVector, d::Diagonal, σ) - for dd in d.diag - push!(v, σ * dd) - end - return v + return append!(v, σ .* d.diag) end function σρ!(v::AbstractVector{T}, t::LowerTriangular{T}, σ::T) where {T} + """ + σρ!(v, t, σ) + + push! `σ` times the row lengths (σs) and the inner products of normalized rows (ρs) of `t` onto `v` + """ dat = t.data for i in axes(dat, 1) ssqr = zero(T) @@ -629,7 +631,7 @@ function pbstrtbl(bsamp::MixedModelFitCollection{T}) where {T} end append!(v, θ) end - m = collect(transpose(reshape(v, (m, n)))) + m = permutedims(reshape(v, (m, n)), (2, 1)) # equivalent to collect(transpose(...)) for k in eachindex(λ, λcp) # restore original contents of λ copyto!(λ[k], λcp[k]) end From ea2209da4b5c452993175b6cee279de7965a5c10 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 29 Aug 2023 19:31:51 +0000 Subject: [PATCH 13/14] Update src/bootstrap.jl Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/bootstrap.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/bootstrap.jl b/src/bootstrap.jl index afcb7c712..786036ae8 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -584,7 +584,6 @@ end function σρ!(v::AbstractVector{T}, t::LowerTriangular{T}, σ::T) where {T} """ σρ!(v, t, σ) - push! `σ` times the row lengths (σs) and the inner products of normalized rows (ρs) of `t` onto `v` """ dat = t.data From 0167027f26ecede57596ed52b2428992f7549f29 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 29 Aug 2023 19:32:25 +0000 Subject: [PATCH 14/14] minor version bump --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 03e2a20a7..199bec9df 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MixedModels" uuid = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316" author = ["Phillip Alday ", "Douglas Bates ", "Jose Bayoan Santiago Calderon "] -version = "4.19.0" +version = "4.20.0" [deps] Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"