From 1754cbd9ce7265e5c8426a4a1630f70145a25220 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Fri, 10 Jun 2022 20:53:44 +0200 Subject: [PATCH 1/8] WIP --- src/scratch.jl | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 src/scratch.jl diff --git a/src/scratch.jl b/src/scratch.jl new file mode 100644 index 00000000..561ecb22 --- /dev/null +++ b/src/scratch.jl @@ -0,0 +1,20 @@ +using GLM +using DataFrames + +y = rand(10) +x = rand(10,2) +wts = rand(10) +df = DataFrame(x, :auto) +df.y = y +df.wts = wts +lm1 = lm(x,y) +lmw = lm(x,y; wts = wts) +lmf = lm(@formula(y~x1+x2), df) +lmfw = lm(@formula(y~x1+x2), df; wts = wts) +glm(x, y) + + +cooksdistance(lm) + + + From 1d778a5b37013fc3333ba7b9f43b5a25ece29f23 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 15 Jun 2022 19:04:42 +0200 Subject: [PATCH 2/8] WIP --- src/GLM.jl | 26 +++++++------- src/glmfit.jl | 58 ++++++++++++++++++++---------- src/linpred.jl | 86 ++++++++++++++++++++++++++------------------- src/lm.jl | 77 ++++++++++++++++++++++++---------------- src/scratch.jl | 65 +++++++++++++++++++++++++++++++--- test/runtests.jl | 91 +++++++++++++++++++++++++++++++++++++++++++----- 6 files changed, 294 insertions(+), 109 deletions(-) diff --git a/src/GLM.jl b/src/GLM.jl index 019f80e3..8625a48b 100644 --- a/src/GLM.jl +++ b/src/GLM.jl @@ -12,14 +12,14 @@ module GLM import Statistics: cor import StatsBase: coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual, loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict, - fitted, fit, model_response, response, modelmatrix, r2, r², adjr2, adjr², PValue + fitted, fit, model_response, response, modelmatrix, r2, r², adjr2, adjr², PValue, weights import StatsFuns: xlogy import SpecialFunctions: erfc, erfcinv, digamma, trigamma import StatsModels: hasintercept export coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual, loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict, fitted, fit, fit!, model_response, response, modelmatrix, r2, r², adjr2, adjr², - cooksdistance, hasintercept + cooksdistance, hasintercept, weights, AnalyticWeights, ProbabilityWeights, FrequencyWeights export # types @@ -52,17 +52,17 @@ module GLM LinearModel, # functions - canonicallink, # canonical link function for a distribution - deviance, # deviance of fitted and observed responses - devresid, # vector of squared deviance residuals - formula, # extract the formula from a model - glm, # general interface - linpred, # linear predictor - lm, # linear model - negbin, # interface to fitting negative binomial regression - nobs, # total number of observations - predict, # make predictions - ftest # compare models with an F test + canonicallink, # canonical link function for a distribution + deviance, # deviance of fitted and observed responses + devresid, # vector of squared deviance residuals + formula, # extract the formula from a model + glm, # general interface + linpred, # linear predictor + lm, # linear model + negbin, # interface to fitting negative binomial regression + nobs, # total number of observations + predict, # make predictions + ftest # compare models with an F test const FP = AbstractFloat const FPVector{T<:FP} = AbstractArray{T,1} diff --git a/src/glmfit.jl b/src/glmfit.jl index 0c108296..942ffc18 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -3,7 +3,7 @@ The response vector and various derived vectors in a generalized linear model. """ -struct GlmResp{V<:FPVector,D<:UnivariateDistribution,L<:Link} <: ModResp +struct GlmResp{V<:FPVector, D<:UnivariateDistribution,L<:Link,W<:AbstractWeights{<:Real}} <: ModResp "`y`: response vector" y::V d::D @@ -18,14 +18,14 @@ struct GlmResp{V<:FPVector,D<:UnivariateDistribution,L<:Link} <: ModResp "`offset:` offset added to `Xβ` to form `eta`. Can be of length 0" offset::V "`wts:` prior case weights. Can be of length 0." - wts::V + wts::W "`wrkwt`: working case weights for the Iteratively Reweighted Least Squares (IRLS) algorithm" wrkwt::V "`wrkresid`: working residuals for IRLS" wrkresid::V end -function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::V) where {V<:FPVector, D, L} +function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::W) where {V<:FPVector, D, L, W} n = length(y) nη = length(η) nμ = length(μ) @@ -48,14 +48,23 @@ function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::V) where {V<:FPVec throw(DimensionMismatch("offset must have length $n or length 0 but was $lo")) end - return GlmResp{V,D,L}(y, d, l, similar(y), η, μ, off, wts, similar(y), similar(y)) + return GlmResp{V,D,L,W}(y, d, l, similar(y), η, μ, off, wts, similar(y), similar(y)) end -function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::FPVector) +function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::AbstractVector{<:Real}) # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly _y = convert(Vector{float(eltype(y))}, y) _off = convert(Vector{float(eltype(off))}, off) - _wts = convert(Vector{float(eltype(wts))}, wts) + _wts = if wts === nothing + ## This should be removed - here for allowing + ## passing a vector (deprecated) + aweights(similar(_y, 0)) + elseif isa(wts, AbstractWeights) + wts + elseif isa(wts, AbstractVector) + ## for backward compatibility + fweights(wts) + end η = similar(_y) μ = similar(_y) r = GlmResp(_y, d, l, η, μ, _off, _wts) @@ -64,13 +73,12 @@ function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::FPVe return r end -function GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, - wts::AbstractVector{<:Real}) where {D, L} - GlmResp(float(y), d, l, float(off), float(wts)) +function GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, wts::AbstractVector{<:Real}) where {D, L} + GlmResp(float(y), d, l, float(off), wts) end deviance(r::GlmResp) = sum(r.devresid) - +weights(r::GlmResp) = r.wts """ cancancel(r::GlmResp{V,D,L}) @@ -374,7 +382,7 @@ function StatsBase.fit!(m::AbstractGLM; if haskey(kwargs, :tol) Base.depwarn("`tol` argument is deprecated, use `atol` and `rtol` instead", :fit!) rtol = kwargs[:tol] - end + end _fit!(m, verbose, maxiter, minstepfac, atol, rtol, start) end @@ -440,12 +448,9 @@ const FIT_GLM_DOC = """ # Keyword Arguments - `dofit::Bool=true`: Determines whether model will be fit - - `wts::Vector=similar(y,0)`: Prior frequency (a.k.a. case) weights of observations. - Such weights are equivalent to repeating each observation a number of times equal - to its weight. Do note that this interpretation gives equal point estimates but - different standard errors from analytical (a.k.a. inverse variance) weights and - from probability (a.k.a. sampling) weights which are the default in some other - software. + - `wts::AbstractWeights=aweights(similar(y,0))`: Weights of observations. + Allowed weights are `AnalyticalWeights`, `FrequencyWeights`, or `ProbabilityWeights`. + If a vector is passed (deprecated) it is coerced to FrequencyWeights. Can be length 0 to indicate no weighting (default). - `offset::Vector=similar(y,0)`: offset added to `Xβ` to form `eta`. Can be of length 0 @@ -476,7 +481,7 @@ function fit(::Type{M}, d::UnivariateDistribution, l::Link = canonicallink(d); dofit::Bool = true, - wts::AbstractVector{<:Real} = similar(y, 0), + wts::Union{AbstractWeights{<:Real}, AbstractVector{<:Real}} = aweights(similar(y, 0)), offset::AbstractVector{<:Real} = similar(y, 0), fitargs...) where {M<:AbstractGLM} @@ -537,6 +542,7 @@ function dispersion(m::AbstractGLM, sqr::Bool=false) end end + """ predict(mm::AbstractGLM, newX::AbstractMatrix; offset::FPVector=eltype(newX)[], interval::Union{Symbol,Nothing}=nothing, level::Real = 0.95, @@ -649,3 +655,19 @@ function checky(y, d::Binomial) end return nothing end + +""" + nobs(obj::LinearModel) + nobs(obj::GLM) + +For linear and generalized linear models, returns the number of rows, or, +when prior weights of type FrequencyWeights are specified, the sum of weights. +""" +nobs(obj::LinPredModel) = nobs(obj.rr) + +nobs(r::LmResp{V,W}) where {V,W} = oftype(sum(one(eltype(r.wts))), length(r.y)) +nobs(r::LmResp{V,W}) where {V,W<:FrequencyWeights} = r.wts.sum + +nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = r.wts.sum +nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W} = oftype(sum(one(eltype(r.wts))), length(r.y)) + diff --git a/src/linpred.jl b/src/linpred.jl index 4274f575..553fb503 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -1,5 +1,5 @@ """ - linpred!(out, p::LinPred, f::Real=1.0) +linpred!(out, p::LinPred, f::Real=1.0) Overwrite `out` with the linear predictor from `p` with factor `f` @@ -11,14 +11,14 @@ function linpred!(out, p::LinPred, f::Real=1.) end """ - linpred(p::LinPred, f::Real=1.0) +linpred(p::LinPred, f::Real=1.0) Return the linear predictor `p.X * (p.beta0 .+ f * p.delbeta)` """ linpred(p::LinPred, f::Real=1.) = linpred!(Vector{eltype(p.X)}(undef, size(p.X, 1)), p, f) """ - installbeta!(p::LinPred, f::Real=1.0) +installbeta!(p::LinPred, f::Real=1.0) Install `pbeta0 .+= f * p.delbeta` and zero out `p.delbeta`. Return the updated `p.beta0`. """ @@ -33,7 +33,7 @@ function installbeta!(p::LinPred, f::Real=1.) end """ - DensePredQR +DensePredQR A `LinPred` type with a dense, unpivoted QR decomposition of `X` @@ -66,7 +66,7 @@ DensePredQR(X::Matrix{T}) where T = DensePredQR{T}(X, zeros(T, size(X,2))) convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X, zeros(T, size(X, 2))) """ - delbeta!(p::LinPred, r::Vector) +delbeta!(p::LinPred, r::Vector) Evaluate and return `p.delbeta` the increment to the coefficient vector from residual `r` """ @@ -78,7 +78,7 @@ function delbeta!(p::DensePredQR{T}, r::Vector{T}) where T<:BlasReal end """ - DensePredChol{T} +DensePredChol{T} A `LinPred` type with a dense Cholesky factorization of `X'X` @@ -106,12 +106,12 @@ function DensePredChol(X::AbstractMatrix, pivot::Bool) T = eltype(F) F = pivot ? pivoted_cholesky!(F, tol = -one(T), check = false) : cholesky!(F) DensePredChol(Matrix{T}(X), - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - F, - similar(X, T), - similar(cholfactors(F))) + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + F, + similar(X, T), + similar(cholfactors(F))) end cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot) @@ -177,12 +177,12 @@ end function SparsePredChol(X::SparseMatrixCSC{T}) where T chol = cholesky(sparse(I, size(X, 2), size(X,2))) return SparsePredChol{eltype(X),typeof(X),typeof(chol)}(X, - X', - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - chol, - similar(X)) + X', + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + chol, + similar(X)) end cholpred(X::SparseMatrixCSC, pivot::Bool=false) = SparsePredChol(X) @@ -220,6 +220,7 @@ function invchol(x::DensePredChol{T,<: CholeskyPivoted}) where T res[ipiv, ipiv] end invchol(x::SparsePredChol) = cholesky!(x) \ Matrix{Float64}(I, size(x.X, 2), size(x.X, 2)) +## For ProbabilityWeights the variance is diferent vcov(x::LinPredModel) = rmul!(invchol(x.pp), dispersion(x, true)) function cor(x::LinPredModel) @@ -235,28 +236,42 @@ function show(io::IO, obj::LinPredModel) end modelframe(obj::LinPredModel) = obj.fr -modelmatrix(obj::LinPredModel) = obj.pp.X + +function modelmatrix(obj::LinPredModel; weighted=false) + wts = weights(obj) + X = obj.pp.X + if !weighted + X + elseif !isempty(wts) + wts_times_X(X, wts) + else + throw(ArgumentError("`weighted=true` allowed only for weighted models.")) + end +end + +function wts_times_X(X::AbstractSparseMatrix, wts::AbstractArray) + Z = copy(X) + rows = rowvals(Z) + vals = nonzeros(Z) + m, n = size(Z) + for j = 1:n + for i in nzrange(Z, j) + r = rows[i] + vals[i] *= sqrt(wts[r]) + end + end + return Z +end + +wts_times_X(X::AbstractMatrix, wts::AbstractArray) = sqrt.(wts).*X + response(obj::LinPredModel) = obj.rr.y fitted(m::LinPredModel) = m.rr.mu predict(mm::LinPredModel) = fitted(mm) StatsModels.formula(obj::LinPredModel) = modelframe(obj).formula -residuals(obj::LinPredModel) = residuals(obj.rr) - -""" - nobs(obj::LinearModel) - nobs(obj::GLM) - -For linear and generalized linear models, returns the number of rows, or, -when prior weights are specified, the sum of weights. -""" -function nobs(obj::LinPredModel) - if isempty(obj.rr.wts) - oftype(sum(one(eltype(obj.rr.wts))), length(obj.rr.y)) - else - sum(obj.rr.wts) - end -end +residuals(obj::LinPredModel; kwarg...) = residuals(obj.rr; kwarg...) +weights(obj::LinPredModel) = weights(obj.rr) coef(x::LinPred) = x.beta0 coef(obj::LinPredModel) = coef(obj.pp) @@ -264,3 +279,4 @@ coef(obj::LinPredModel) = coef(obj.pp) dof_residual(obj::LinPredModel) = nobs(obj) - dof(obj) + 1 hasintercept(m::LinPredModel) = any(i -> all(==(1), view(m.pp.X , :, i)), 1:size(m.pp.X, 2)) + \ No newline at end of file diff --git a/src/lm.jl b/src/lm.jl index ed11f450..994ac895 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -7,46 +7,48 @@ Encapsulates the response for a linear model - `mu`: current value of the mean response vector or fitted value - `offset`: optional offset added to the linear predictor to form `mu` -- `wts`: optional vector of prior frequency (a.k.a. case) weights for observations +- `wts`: optional weights for observations (AbstractWeights) - `y`: observed response vector Either or both `offset` and `wts` may be of length 0 """ -mutable struct LmResp{V<:FPVector} <: ModResp # response in a linear model +mutable struct LmResp{V<:FPVector, W<:Union{AbstractWeights{<:Real}, AbstractVector{<:Real}}} <: ModResp # response in a linear model mu::V # mean response offset::V # offset added to linear predictor (may have length 0) - wts::V # prior weights (may have length 0) + wts::W # prior weights (may have length 0) y::V # response - function LmResp{V}(mu::V, off::V, wts::V, y::V) where V + function LmResp{V, W}(mu::V, off::V, wts::W, y::V) where {V, W} n = length(y) length(mu) == n || error("mismatched lengths of mu and y") ll = length(off) ll == 0 || ll == n || error("length of offset is $ll, must be $n or 0") ll = length(wts) ll == 0 || ll == n || error("length of wts is $ll, must be $n or 0") - new{V}(mu, off, wts, y) + new{V,W}(mu, off, wts, y) end end -function LmResp(y::AbstractVector{<:Real}, wts::Union{Nothing,AbstractVector{<:Real}}=nothing) +function LmResp(y::AbstractVector{<:Real}, wts::Union{Nothing,AbstractVector{<:Real}, AbstractWeights{<:Real}}=nothing) # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly _y = convert(Vector{float(eltype(y))}, y) - _wts = if wts === nothing - similar(_y, 0) + _wts = if wts === nothing + aweights(similar(_y, 0)) + elseif isa(wts, Vector) + fweights(wts) else - convert(Vector{float(eltype(wts))}, wts) + wts end - return LmResp{typeof(_y)}(zero(_y), zero(_y), _wts, _y) + return LmResp{typeof(_y), typeof(_wts)}(zero(_y), zero(_y), _wts, _y) end -function updateμ!(r::LmResp{V}, linPr::V) where V<:FPVector +function updateμ!(r::LmResp{V, W}, linPr::V) where {V<:FPVector, W} n = length(linPr) length(r.y) == n || error("length(linPr) is $n, should be $(length(r.y))") length(r.offset) == 0 ? copyto!(r.mu, linPr) : broadcast!(+, r.mu, linPr, r.offset) deviance(r) end -updateμ!(r::LmResp{V}, linPr) where {V<:FPVector} = updateμ!(r, convert(V, vec(linPr))) +updateμ!(r::LmResp{V, W}, linPr) where {V<:FPVector, W} = updateμ!(r, convert(V, vec(linPr))) function deviance(r::LmResp) y = r.y @@ -97,7 +99,19 @@ function nullloglikelihood(r::LmResp) -n/2 * (log(2π * nulldeviance(r)/n) + 1) end -residuals(r::LmResp) = r.y - r.mu +function residuals(r::LmResp; weighted=false) + wts = weights(r) + res = r.y - r.mu + if !weighted + res + elseif !isempty(wts) + sqrt.(wts).*res + else + throw(ArgumentError("`weighted=true` allowed only for weighted models.")) + end +end + +weights(r::LmResp) = r.wts """ LinearModel @@ -120,7 +134,7 @@ function StatsBase.fit!(obj::LinearModel) if isempty(obj.rr.wts) delbeta!(obj.pp, obj.rr.y) else - delbeta!(obj.pp, obj.rr.y, obj.rr.wts) + delbeta!(obj.pp, obj.rr.y, convert(Vector{eltype(obj.rr.y)}, obj.rr.wts)) end installbeta!(obj.pp) updateμ!(obj.rr, linpred(obj.pp, zero(eltype(obj.rr.y)))) @@ -135,12 +149,15 @@ const FIT_LM_DOC = """ in columns (including if appropriate the intercept), and `y` must be a vector holding values of the dependent variable. - The keyword argument `wts` can be a `Vector` specifying frequency weights for observations. - Such weights are equivalent to repeating each observation a number of times equal - to its weight. Do note that this interpretation gives equal point estimates but - different standard errors from analytical (a.k.a. inverse variance) weights and - from probability (a.k.a. sampling) weights which are the default in some other - software. + The keyword argument `wts` can be an `AbstractWeights` specifying frequency weights for observations. + Weights allowed are: + - `AnalyticaWeights`: describe a non-random relative importance (usually between 0 and 1) + for each observation. + - `FrequencyWeights`: describe the number of times (or frequency) each observation was observed. + - `ProbabilityWeights`: represent the inverse of the sampling probability for each observation, + providing a correction mechanism for under- or over-sampling certain population groups + These weights gives equal point estimates but different standard errors. + If a vector is passed (deprecated), it is coerced to `FrequencyWeights`. `dropcollinear` controls whether or not `lm` accepts a model matrix which is less-than-full rank. If `true` (the default), only the first of each set of @@ -166,6 +183,9 @@ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{< @warn "Positional argument `allowrankdeficient` is deprecated, use keyword " * "argument `dropcollinear` instead. Proceeding with positional argument value: $allowrankdeficient_dep" dropcollinear = allowrankdeficient_dep + end + if isa(wts, Vector) + Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using AnalyticalWeights, ProbabilityWeights, or FrequencyWeights.", :fit) end fit!(LinearModel(LmResp(y, wts), cholpred(X, dropcollinear))) end @@ -206,6 +226,7 @@ nullloglikelihood(obj::LinearModel) = nullloglikelihood(obj.rr) r2(obj::LinearModel) = 1 - deviance(obj)/nulldeviance(obj) + function adjr2(obj::LinearModel) n = nobs(obj) # dof() includes the dispersion parameter @@ -299,19 +320,15 @@ of each data point. Currently only implemented for linear models without weights. """ function StatsBase.cooksdistance(obj::LinearModel) - u = residuals(obj) - mse = dispersion(obj,true) + wts = weights(obj) + u = residuals(obj; weighted=!isempty(wts)) + mse = GLM.dispersion(obj,true) k = dof(obj)-1 d_res = dof_residual(obj) - X = modelmatrix(obj) - XtX = crossmodelmatrix(obj) + X = modelmatrix(obj; weighted=!isempty(wts)) + XtX = crossmodelmatrix(obj; weighted=!isempty(wts)) k == size(X,2) || throw(ArgumentError("Models with collinear terms are not currently supported.")) - wts = obj.rr.wts - if isempty(wts) - hii = diag(X * inv(XtX) * X') - else - throw(ArgumentError("Weighted models are not currently supported.")) - end + hii = diag(X * inv(XtX) * X') D = @. u^2 * (hii / (1 - hii)^2) / (k*mse) return D end diff --git a/src/scratch.jl b/src/scratch.jl index 561ecb22..4f2612b0 100644 --- a/src/scratch.jl +++ b/src/scratch.jl @@ -1,5 +1,10 @@ using GLM using DataFrames +using Random +using CSV +using StatsBase +using RDatasets +Random.seed!(11) y = rand(10) x = rand(10,2) @@ -9,12 +14,64 @@ df.y = y df.wts = wts lm1 = lm(x,y) lmw = lm(x,y; wts = wts) -lmf = lm(@formula(y~x1+x2), df) -lmfw = lm(@formula(y~x1+x2), df; wts = wts) -glm(x, y) +lmf = lm(@formula(y~x1+x2-1), df) +lmfw = lm(@formula(y~-1+x1+x2), df; wts = aweights(wts)) +lmfw = lm(@formula(y~-1+x1+x2), df; wts = pweights(wts)) +lmfw = lm(@formula(y~-1+x1+x2), df; wts = fweights(wts)) +glm(@formula(y~-1+x1+x2), df, Normal, IdentityLink; wts = fweights(wts)) -cooksdistance(lm) +cooksdistance(lm1) +df = dataset("quantreg", "engel") +N = nrow(df) +df.weights = repeat(1:5, Int(N/5)) +f = @formula(FoodExp ~ Income) +lm_model = lm(f, df, wts = FrequencyWeights(df.weights)) +glm_model = glm(f, df, Normal(), wts = FrequencyWeights(df.weights)) +@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) +@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) +@test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968]) +@test isapprox(r2(lm_model), 0.8330258148644486) +@test isapprox(adjr2(lm_model), 0.832788298242634) +@test isapprox(vcov(lm_model), [88.02760245551447 -0.06772589439264813; + -0.06772589439264813 6.670664781664879e-5]) +@test isapprox(first(predict(lm_model)), 357.57694841780994) +@test isapprox(loglikelihood(lm_model), -4353.946729075838) +@test isapprox(loglikelihood(glm_model), -4353.946729075838) +@test isapprox(nullloglikelihood(lm_model), -4984.892139711452) +@test isapprox(mean(residuals(lm_model)), -5.412966629787718) + +lm_model = lm(f, df, wts = df.weights) +glm_model = glm(f, df, Normal(), wts = df.weights) +@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) +@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) +@test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968]) +@test isapprox(r2(lm_model), 0.8330258148644486) +@test isapprox(adjr2(lm_model), 0.832788298242634) +@test isapprox(vcov(lm_model), [88.02760245551447 -0.06772589439264813; + -0.06772589439264813 6.670664781664879e-5]) +@test isapprox(first(predict(lm_model)), 357.57694841780994) +@test isapprox(loglikelihood(lm_model), -4353.946729075838) +@test isapprox(loglikelihood(glm_model), -4353.946729075838) +@test isapprox(nullloglikelihood(lm_model), -4984.892139711452) +@test isapprox(mean(residuals(lm_model)), -5.412966629787718) + + + +lm_model = lm(f, df, wts = aweights(df.weights)) +glm_model = glm(f, df, Normal(), wts = aweights(df.weights)) +@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) +@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) +@test isapprox(stderror(lm_model), [16.297055281313032, 0.014186793927918842]) +@test isapprox(r2(lm_model), 0.8330258148644486) +@test isapprox(adjr2(lm_model), 0.8323091874604334) +@test isapprox(vcov(lm_model), [265.59401084217296 -0.20434035947652907; + -0.20434035947652907 0.00020126512195323495]) +@test isapprox(first(predict(lm_model)), 357.57694841780994) +@test isapprox(loglikelihood(lm_model), -4353.946729075838) +@test isapprox(loglikelihood(glm_model), -4353.946729075838) +@test isapprox(nullloglikelihood(lm_model), -4984.892139711452) +@test isapprox(mean(residuals(lm_model)), -5.412966629787718) \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index e38c10f4..90bd33a0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -83,12 +83,13 @@ end end @testset "linear model with weights" begin + df = dataset("quantreg", "engel") N = nrow(df) df.weights = repeat(1:5, Int(N/5)) f = @formula(FoodExp ~ Income) - lm_model = lm(f, df, wts = df.weights) - glm_model = glm(f, df, Normal(), wts = df.weights) + lm_model = lm(f, df, wts = FrequencyWeights(df.weights)) + glm_model = glm(f, df, Normal(), wts = FrequencyWeights(df.weights)) @test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) @test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) @test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968]) @@ -101,6 +102,29 @@ end @test isapprox(loglikelihood(glm_model), -4353.946729075838) @test isapprox(nullloglikelihood(lm_model), -4984.892139711452) @test isapprox(mean(residuals(lm_model)), -5.412966629787718) + + lm_model = lm(f, df, wts = df.weights) + glm_model = glm(f, df, Normal(), wts = df.weights) + @test isa(weights(lm_model), FrequencyWeights) + @test isa(weights(glm_model), FrequencyWeights) + + + + + lm_model = lm(f, df, wts = aweights(df.weights)) + glm_model = glm(f, df, Normal(), wts = aweights(df.weights)) + @test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) + @test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) + @test isapprox(stderror(lm_model), [16.297055281313032, 0.014186793927918842]) + @test isapprox(r2(lm_model), 0.8330258148644486) + @test isapprox(adjr2(lm_model), 0.8323091874604334) + @test isapprox(vcov(lm_model), [265.59401084217296 -0.20434035947652907; + -0.20434035947652907 0.00020126512195323495]) + @test isapprox(first(predict(lm_model)), 357.57694841780994) + @test isapprox(loglikelihood(lm_model), -4353.946729075838) + @test isapprox(loglikelihood(glm_model), -4353.946729075838) + @test isapprox(nullloglikelihood(lm_model), -4984.892139711452) + @test isapprox(mean(residuals(lm_model)), -5.412966629787718) end @testset "rankdeficient" begin @@ -128,8 +152,9 @@ end @test all(isnan, hcat(coeftable(m2p).cols[2:end]...)[7,:]) m2p_dep_pos = fit(LinearModel, Xmissingcell, ymissingcell, true) - @test_logs (:warn, "Positional argument `allowrankdeficient` is deprecated, use keyword " * - "argument `dropcollinear` instead. Proceeding with positional argument value: true") fit(LinearModel, Xmissingcell, ymissingcell, true) + @test_logs (:warn, "Positional argument `allowrankdeficient` is deprecated, use keyword " * "argument `dropcollinear` instead. Proceeding with positional argument value: true") (:warn, "Passing weights as vector is deprecated in favor of explicitely using " * "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights.") fit(LinearModel, Xmissingcell, ymissingcell, true) + # @test_logs (:warn, "Positional argument `allowrankdeficient` is deprecated, use keyword " * + # "argument `dropcollinear` instead. Proceeding with positional argument value: true") fit(LinearModel, Xmissingcell, ymissingcell, true) @test isa(m2p_dep_pos.pp.chol, CholeskyPivoted) @test rank(m2p_dep_pos.pp.chol) == rank(m2p.pp.chol) @test isapprox(deviance(m2p_dep_pos), deviance(m2p)) @@ -407,10 +432,10 @@ admit_agr = DataFrame(count = [28., 97, 93, 55, 33, 54, 28, 12], admit = repeat([false, true], inner=[4]), rank = categorical(repeat(1:4, outer=2))) -@testset "Aggregated Binomial LogitLink" begin +@testset "Aggregated Binomial LogitLink (FrequencyWeights)" begin for distr in (Binomial, Bernoulli) gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr(), - wts=Array(admit_agr.count)) + wts=fweights(admit_agr.count)) @test dof(gm14) == 4 @test nobs(gm14) == 400 @test isapprox(deviance(gm14), 474.9667184280627) @@ -421,8 +446,25 @@ admit_agr = DataFrame(count = [28., 97, 93, 55, 33, 54, 28, 12], @test isapprox(coef(gm14), [0.164303051291, -0.7500299832, -1.36469792994, -1.68672866457], atol=1e-5) end + end +@testset "Aggregated Binomial LogitLink (AnalyticWeights)" begin + for distr in (Binomial, Bernoulli) + gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr(), + wts=aweights(admit_agr.count)) + @test dof(gm14) == 4 + @test nobs(gm14) == 8 + @test isapprox(deviance(gm14), 474.9667184280627) + @test isapprox(loglikelihood(gm14), -237.48335921403134) + @test isapprox(aic(gm14), 482.96671842822883) + @test isapprox(aicc(gm14), 496.3000517613874) + @test isapprox(bic(gm14), 483.28448459477346) + @test isapprox(coef(gm14), + [0.164303051291, -0.7500299832, -1.36469792994, -1.68672866457], atol=1e-5) + end + +end # Logistic regression using aggregated data with proportions of successes and weights admit_agr2 = DataFrame(Any[[61., 151, 121, 67], [33., 54, 28, 12], categorical(1:4)], [:count, :admit, :rank]) @@ -431,7 +473,7 @@ admit_agr2.p = admit_agr2.admit ./ admit_agr2.count ## The model matrix here is singular so tests like the deviance are just round off error @testset "Binomial LogitLink aggregated" begin gm15 = fit(GeneralizedLinearModel, @formula(p ~ rank), admit_agr2, Binomial(), - wts=admit_agr2.count) + wts=fweights(admit_agr2.count)) test_show(gm15) @test dof(gm15) == 4 @test nobs(gm15) == 400 @@ -446,7 +488,7 @@ end # Weighted Gamma example (weights are totally made up) @testset "Gamma InverseLink Weights" begin gm16 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma(), - wts=[1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]) + wts=fweights([1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7])) test_show(gm16) @test dof(gm16) == 3 @test nobs(gm16) == 32.7 @@ -461,7 +503,7 @@ end # Weighted Poisson example (weights are totally made up) @testset "Poisson LogLink Weights" begin gm17 = fit(GeneralizedLinearModel, @formula(Counts ~ Outcome + Treatment), dobson, Poisson(), - wts = [1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]) + wts = fweights([1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7])) test_show(gm17) @test dof(gm17) == 5 @test isapprox(deviance(gm17), 17.699857821414266) @@ -618,6 +660,37 @@ end end end +@testset "Sparse LM (weighted)" begin + rng = StableRNG(1) + X = sprand(rng, 1000, 10, 0.01) + β = randn(rng, 10) + y = Bool[rand(rng) < logistic(x) for x in X * β] + wts = rand(1000) + gmsparsev = [fit(LinearModel, X, y; wts=fweights(wts)), + fit(LinearModel, X, sparse(y); wts=fweights(wts)), + fit(LinearModel, Matrix(X), sparse(y); wts=fweights(wts))] + gmdense = fit(LinearModel, Matrix(X), y; wts=fweights(wts)) + + for gmsparse in gmsparsev + @test isapprox(deviance(gmsparse), deviance(gmdense)) + @test isapprox(coef(gmsparse), coef(gmdense)) + @test isapprox(vcov(gmsparse), vcov(gmdense)) + @test isapprox(Matrix(modelmatrix(gmsparse; weighted=true)), modelmatrix(gmdense; weighted=true)) + end + + gmsparsev = [fit(LinearModel, X, y; wts=aweights(wts)), + fit(LinearModel, X, sparse(y); wts=aweights(wts)), + fit(LinearModel, Matrix(X), sparse(y); wts=aweights(wts))] + gmdense = fit(LinearModel, Matrix(X), y; wts=aweights(wts)) + + for gmsparse in gmsparsev + @test isapprox(deviance(gmsparse), deviance(gmdense)) + @test isapprox(coef(gmsparse), coef(gmdense)) + @test isapprox(vcov(gmsparse), vcov(gmdense)) + @test isapprox(Matrix(modelmatrix(gmsparse; weighted=true)), modelmatrix(gmdense; weighted=true)) + end +end + @testset "Predict" begin rng = StableRNG(123) X = rand(rng, 10, 2) From 12121a31b2a3642f065f2e0e152a2de3ef8bd30a Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 15 Jun 2022 19:20:30 +0200 Subject: [PATCH 3/8] WIP --- src/glmfit.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index 942ffc18..e9ad3f6c 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -666,8 +666,8 @@ when prior weights of type FrequencyWeights are specified, the sum of weights. nobs(obj::LinPredModel) = nobs(obj.rr) nobs(r::LmResp{V,W}) where {V,W} = oftype(sum(one(eltype(r.wts))), length(r.y)) -nobs(r::LmResp{V,W}) where {V,W<:FrequencyWeights} = r.wts.sum +nobs(r::LmResp{V,W}) where {V,W<:FrequencyWeights} = isempty(r.wts) ? oftype(sum(one(eltype(r.wts))), length(r.y)) : r.wts.sum -nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = r.wts.sum +nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = isempty(r.wts) ? oftype(sum(one(eltype(r.wts))), length(r.y)) : r.wts.sum nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W} = oftype(sum(one(eltype(r.wts))), length(r.y)) From 4363ba49364a080d3e388698cf50f6a7d8f995fb Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Fri, 17 Jun 2022 15:48:42 +0200 Subject: [PATCH 4/8] Taking weights seriously --- src/linpred.jl | 104 +++++++++++++++++++++++++------------------------ src/lm.jl | 23 ++++++----- 2 files changed, 67 insertions(+), 60 deletions(-) diff --git a/src/linpred.jl b/src/linpred.jl index 553fb503..972489e1 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -45,24 +45,33 @@ A `LinPred` type with a dense, unpivoted QR decomposition of `X` - `scratchbeta`: scratch vector of length `p`, used in `linpred!` method - `qr`: a `QRCompactWY` object created from `X`, with optional row weights. """ -mutable struct DensePredQR{T<:BlasReal} <: DensePred +mutable struct DensePredQR{T<:BlasReal, W<:AbstractVector{<:Real}} <: DensePred X::Matrix{T} # model matrix + Xw::Matrix{T} # weighted model matrix beta0::Vector{T} # base coefficient vector delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} qr::QRCompactWY{T} - function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}) where T + wts::W + function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}, wts::W) where {T,W<:AbstractWeights{<:Real}} n, p = size(X) length(beta0) == p || throw(DimensionMismatch("length(β0) ≠ size(X,2)")) - new{T}(X, beta0, zeros(T,p), zeros(T,p), qr(X)) + (length(wts) == n || isempty(wts)) || throw(DimensionMismatch("Lenght of weights does not match the dimension of X")) + Xw = isempty(_wt) ? Matrix{T}(undef, 0, 0) : sqrt.(wts).*X + qrX = isempty(_wts) ? qr(X) : qr(Xw) + new{T,W}(X, Xw, beta0, zeros(T,p), zeros(T,p), qrX, wts) end - function DensePredQR{T}(X::Matrix{T}) where T + function DensePredQR{T}(X::Matrix{T}, wts::W) where {T,W} n, p = size(X) - new{T}(X, zeros(T, p), zeros(T,p), zeros(T,p), qr(X)) + DensePredQR(X, zeros(T, p), wts) + end + function DensePredQR(X::Matrix{T}) where T + n, p = size(X) + DensePredQR{T}(X, zeros(T, p), uweights(0)) end end -DensePredQR(X::Matrix, beta0::Vector) = DensePredQR{eltype(X)}(X, beta0) -DensePredQR(X::Matrix{T}) where T = DensePredQR{T}(X, zeros(T, size(X,2))) +DensePredQR(X::Matrix, beta0::Vector, wts::AbstractVector) = DensePredQR{eltype(X)}(X, beta0, wts) +DensePredQR(X::Matrix{T}, wts::AbstractVector) where T = DensePredQR{T}(X, zeros(T, size(X,2)), wts) convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X, zeros(T, size(X, 2))) """ @@ -92,29 +101,34 @@ A `LinPred` type with a dense Cholesky factorization of `X'X` - `scratchm1`: scratch Matrix{T} of the same size as `X` - `scratchm2`: scratch Matrix{T} os the same size as `X'X` """ -mutable struct DensePredChol{T<:BlasReal,C} <: DensePred +mutable struct DensePredChol{T<:BlasReal,W<:AbstractVector{<:Real},C} <: DensePred X::Matrix{T} # model matrix + Xw::Matrix{T} # weighted model matrix beta0::Vector{T} # base vector for coefficients delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} - chol::C + wts::W + chol::C scratchm1::Matrix{T} scratchm2::Matrix{T} end -function DensePredChol(X::AbstractMatrix, pivot::Bool) - F = Hermitian(float(X'X)) +function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Real}=uweights(0)) + Xw = isempty(wts) ? Matrix{eltype(X)}(undef, 0, 0) : sqrt.(wts).*X + F = isempty(wts) ? Hermitian(float(X'X)) : Hermitian(float(Xw'Xw)) T = eltype(F) F = pivot ? pivoted_cholesky!(F, tol = -one(T), check = false) : cholesky!(F) DensePredChol(Matrix{T}(X), + Matrix{T}(Xw), zeros(T, size(X, 2)), zeros(T, size(X, 2)), zeros(T, size(X, 2)), + wts, F, similar(X, T), similar(cholfactors(F))) end -cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot) +cholpred(X::AbstractMatrix, pivot::Bool, wts) = DensePredChol(X, pivot, wts) cholfactors(c::Union{Cholesky,CholeskyPivoted}) = c.factors cholesky!(p::DensePredChol{T}) where {T<:FP} = p.chol @@ -131,9 +145,10 @@ function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}) where T<:BlasRea p end -function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Vector{T}) where T<:BlasReal ch = p.chol - delbeta = mul!(p.delbeta, adjoint(p.X), r) + Z = isempty(p.wts) ? p.X : p.Xw + delbeta = mul!(p.delbeta, adjoint(Z), r) rnk = rank(ch) if rnk == length(delbeta) ldiv!(ch, delbeta) @@ -148,36 +163,43 @@ function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}) where T<: p end -function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal - scr = mul!(p.scratchm1, Diagonal(wt), p.X) - cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(scr), p.X), :U)) +function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:Cholesky}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal + Z = isempty(p.wts) ? X : Xw + scr = mul!(p.scratchm1, Diagonal(wt), Z) + cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(scr), Z), :U)) mul!(p.delbeta, transpose(scr), r) ldiv!(p.chol, p.delbeta) p end -function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal + Z = isempty(p.wts) ? p.X : p.Xw cf = cholfactors(p.chol) piv = p.chol.p - cf .= mul!(p.scratchm2, adjoint(LinearAlgebra.mul!(p.scratchm1, Diagonal(wt), p.X)), p.X)[piv, piv] + cf .= mul!(p.scratchm2, adjoint(LinearAlgebra.mul!(p.scratchm1, Diagonal(wt), Z)), Z)[piv, piv] cholesky!(Hermitian(cf, Symbol(p.chol.uplo))) ldiv!(p.chol, mul!(p.delbeta, transpose(p.scratchm1), r)) p end -mutable struct SparsePredChol{T,M<:SparseMatrixCSC,C} <: GLM.LinPred +mutable struct SparsePredChol{T,W<:AbstractWeights{<:Real},M<:SparseMatrixCSC,C} <: GLM.LinPred X::M # model matrix + Xw::M # weighted model matrix Xt::M # X' beta0::Vector{T} # base vector for coefficients delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} + wts::W chol::C scratch::M end -function SparsePredChol(X::SparseMatrixCSC{T}) where T +function SparsePredChol(X::SparseMatrixCSC{T}, wts::AbstractVector) where T chol = cholesky(sparse(I, size(X, 2), size(X,2))) + sqrtwts = sqrt.(wts) + Xw = isempty(wts) ? SparseMatrixCSC(I, 0, 0) : sqrtwts.*X return SparsePredChol{eltype(X),typeof(X),typeof(chol)}(X, - X', + Xw, + isempty(wts) ? X' : Xw', zeros(T, size(X, 2)), zeros(T, size(X, 2)), zeros(T, size(X, 2)), @@ -185,13 +207,14 @@ function SparsePredChol(X::SparseMatrixCSC{T}) where T similar(X)) end -cholpred(X::SparseMatrixCSC, pivot::Bool=false) = SparsePredChol(X) +cholpred(X::SparseMatrixCSC, pivot::Bool=false, wts::AbstractVector=uweights(0)) = SparsePredChol(X, wts) function delbeta!(p::SparsePredChol{T}, r::Vector{T}, wt::Vector{T}) where T - scr = mul!(p.scratch, Diagonal(wt), p.X) - XtWX = p.Xt*scr + Z = isempty(p.wts) ? X : Xw + #scr = mul!(p.scratch, Diagonal(wt), Z) + XtWX = p.Xt*Z c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) - p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) + p.delbeta = c \ mul!(p.delbeta, adjoint(Z), r) end function delbeta!(p::SparsePredChol{T}, r::Vector{T}) where T @@ -237,34 +260,16 @@ end modelframe(obj::LinPredModel) = obj.fr -function modelmatrix(obj::LinPredModel; weighted=false) - wts = weights(obj) - X = obj.pp.X - if !weighted - X - elseif !isempty(wts) - wts_times_X(X, wts) +function modelmatrix(obj::LinPredModel; weighted=false) + if !weighted + obj.pp.X + elseif !isempty(weights(obj)) + obj.pp.Xw else throw(ArgumentError("`weighted=true` allowed only for weighted models.")) end end -function wts_times_X(X::AbstractSparseMatrix, wts::AbstractArray) - Z = copy(X) - rows = rowvals(Z) - vals = nonzeros(Z) - m, n = size(Z) - for j = 1:n - for i in nzrange(Z, j) - r = rows[i] - vals[i] *= sqrt(wts[r]) - end - end - return Z -end - -wts_times_X(X::AbstractMatrix, wts::AbstractArray) = sqrt.(wts).*X - response(obj::LinPredModel) = obj.rr.y fitted(m::LinPredModel) = m.rr.mu @@ -279,4 +284,3 @@ coef(obj::LinPredModel) = coef(obj.pp) dof_residual(obj::LinPredModel) = nobs(obj) - dof(obj) + 1 hasintercept(m::LinPredModel) = any(i -> all(==(1), view(m.pp.X , :, i)), 1:size(m.pp.X, 2)) - \ No newline at end of file diff --git a/src/lm.jl b/src/lm.jl index 994ac895..89ba41c8 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -30,15 +30,8 @@ end function LmResp(y::AbstractVector{<:Real}, wts::Union{Nothing,AbstractVector{<:Real}, AbstractWeights{<:Real}}=nothing) # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly - _y = convert(Vector{float(eltype(y))}, y) - _wts = if wts === nothing - aweights(similar(_y, 0)) - elseif isa(wts, Vector) - fweights(wts) - else - wts - end - return LmResp{typeof(_y), typeof(_wts)}(zero(_y), zero(_y), _wts, _y) + _y = convert(Vector{float(eltype(y))}, y) + return LmResp{typeof(_y), typeof(wts)}(zero(_y), zero(_y), wts, _y) end function updateμ!(r::LmResp{V, W}, linPr::V) where {V<:FPVector, W} @@ -187,7 +180,17 @@ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{< if isa(wts, Vector) Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using AnalyticalWeights, ProbabilityWeights, or FrequencyWeights.", :fit) end - fit!(LinearModel(LmResp(y, wts), cholpred(X, dropcollinear))) + _wts = if wts === nothing + uweights(0) + elseif isa(wts, AbstractWeights) + wts + elseif isa(wts, AbstractVector) + fweights(wts) + else + throw(ArgumentError("`wts` should be an AbstractVector coercible to an AbstractWeights")) + end + + fit!(LinearModel(LmResp(y, _wts), cholpred(X, dropcollinear, _wts))) end """ From ca702dcda769bf137ebcf1b86a9c0501cc75e49d Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Sat, 18 Jun 2022 13:07:07 +0200 Subject: [PATCH 5/8] WIP --- src/glmfit.jl | 4 ++-- src/linpred.jl | 52 ++++++++++++++++++++++++++++++++++++++------------ src/lm.jl | 12 +++--------- 3 files changed, 45 insertions(+), 23 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index e9ad3f6c..760dcb1e 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -481,7 +481,7 @@ function fit(::Type{M}, d::UnivariateDistribution, l::Link = canonicallink(d); dofit::Bool = true, - wts::Union{AbstractWeights{<:Real}, AbstractVector{<:Real}} = aweights(similar(y, 0)), + wts::AbstractWeights{<:Real}, offset::AbstractVector{<:Real} = similar(y, 0), fitargs...) where {M<:AbstractGLM} @@ -491,7 +491,7 @@ function fit(::Type{M}, end rr = GlmResp(y, d, l, offset, wts) - res = M(rr, cholpred(X), false) + res = M(rr, cholpred(X, false, wts), false) return dofit ? fit!(res; fitargs...) : res end diff --git a/src/linpred.jl b/src/linpred.jl index 972489e1..b74d4b78 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -112,7 +112,7 @@ mutable struct DensePredChol{T<:BlasReal,W<:AbstractVector{<:Real},C} <: DensePr scratchm1::Matrix{T} scratchm2::Matrix{T} end -function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Real}=uweights(0)) +function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Real}) Xw = isempty(wts) ? Matrix{eltype(X)}(undef, 0, 0) : sqrt.(wts).*X F = isempty(wts) ? Hermitian(float(X'X)) : Hermitian(float(Xw'Xw)) T = eltype(F) @@ -128,7 +128,8 @@ function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Re similar(cholfactors(F))) end -cholpred(X::AbstractMatrix, pivot::Bool, wts) = DensePredChol(X, pivot, wts) +cholpred(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights) = DensePredChol(X, pivot, wts) +cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot, uweights(0)) cholfactors(c::Union{Cholesky,CholeskyPivoted}) = c.factors cholesky!(p::DensePredChol{T}) where {T<:FP} = p.chol @@ -163,22 +164,21 @@ function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Ve p end -function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:Cholesky}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal - Z = isempty(p.wts) ? X : Xw - scr = mul!(p.scratchm1, Diagonal(wt), Z) - cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(scr), Z), :U)) - mul!(p.delbeta, transpose(scr), r) +function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:Cholesky}, r::Vector{T}) where T<:BlasReal + Z = isempty(p.wts) ? p.X : p.Xw + cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(Z), Z), :U)) + mul!(p.delbeta, transpose(Z), r) ldiv!(p.chol, p.delbeta) p end -function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Vector{T}) where T<:BlasReal Z = isempty(p.wts) ? p.X : p.Xw cf = cholfactors(p.chol) piv = p.chol.p - cf .= mul!(p.scratchm2, adjoint(LinearAlgebra.mul!(p.scratchm1, Diagonal(wt), Z)), Z)[piv, piv] + cf .= mul!(p.scratchm2, adjoint(Z), Z)[piv, piv] cholesky!(Hermitian(cf, Symbol(p.chol.uplo))) - ldiv!(p.chol, mul!(p.delbeta, transpose(p.scratchm1), r)) + ldiv!(p.chol, mul!(p.delbeta, transpose(Z), r)) p end @@ -243,8 +243,36 @@ function invchol(x::DensePredChol{T,<: CholeskyPivoted}) where T res[ipiv, ipiv] end invchol(x::SparsePredChol) = cholesky!(x) \ Matrix{Float64}(I, size(x.X, 2), size(x.X, 2)) -## For ProbabilityWeights the variance is diferent -vcov(x::LinPredModel) = rmul!(invchol(x.pp), dispersion(x, true)) + +function vcov(x::LinPredModel) + d = dispersion(x, true) + B = _covm(x.pp) + rmul!(B, dispersion(x, true)) +end + +_covm(pp::DensePredChol{T, W}) where {T,W} = invchol(pp) + +function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}) where {T} + wts = pp.wts + Z = pp.scratchm1 .= pp.X.*wts + XtW2X = Z'Z + invXtWX = invchol(pp) + invXtWX*XtW2X*invXtWX +end + +function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:CholeskyPivoted}) where {T} + wts = pp.wts + Z = pp.scratchm1 .= pp.X.*wts + rnk = rank(pp.chol) + p = length(pp.delbeta) + if rnk == p + XtW2X = Z'Z + else + ## no idea + end + invXtWX = invchol(pp) + invXtWX*XtW2X*invXtWX +end function cor(x::LinPredModel) Σ = vcov(x) diff --git a/src/lm.jl b/src/lm.jl index 89ba41c8..32c6a6d7 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -124,11 +124,7 @@ end LinearAlgebra.cholesky(x::LinearModel) = cholesky(x.pp) function StatsBase.fit!(obj::LinearModel) - if isempty(obj.rr.wts) - delbeta!(obj.pp, obj.rr.y) - else - delbeta!(obj.pp, obj.rr.y, convert(Vector{eltype(obj.rr.y)}, obj.rr.wts)) - end + delbeta!(obj.pp, obj.rr.y) installbeta!(obj.pp) updateμ!(obj.rr, linpred(obj.pp, zero(eltype(obj.rr.y)))) return obj @@ -177,17 +173,15 @@ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{< "argument `dropcollinear` instead. Proceeding with positional argument value: $allowrankdeficient_dep" dropcollinear = allowrankdeficient_dep end - if isa(wts, Vector) - Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using AnalyticalWeights, ProbabilityWeights, or FrequencyWeights.", :fit) - end _wts = if wts === nothing uweights(0) elseif isa(wts, AbstractWeights) wts elseif isa(wts, AbstractVector) + Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using AnalyticalWeights, ProbabilityWeights, or FrequencyWeights", :fit) fweights(wts) else - throw(ArgumentError("`wts` should be an AbstractVector coercible to an AbstractWeights")) + throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights")) end fit!(LinearModel(LmResp(y, _wts), cholpred(X, dropcollinear, _wts))) From e2b2d1223220bd9641a10e39422314a58de67527 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Tue, 21 Jun 2022 17:55:01 +0200 Subject: [PATCH 6/8] Taking weights seriously --- src/glmfit.jl | 46 +++++++++--------- src/linpred.jl | 118 ++++++++++++++++++++++++++++++++--------------- src/lm.jl | 21 +++++---- test/runtests.jl | 31 ++++++++----- 4 files changed, 135 insertions(+), 81 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index 760dcb1e..277c82ad 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -41,8 +41,8 @@ function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::W) where {V<:FPVec end # Lengths of wts and off can be either n or 0 - if lw != 0 && lw != n - throw(DimensionMismatch("wts must have length $n or length 0 but was $lw")) + if lw != n + throw(DimensionMismatch("wts must have length $n but was $lw")) end if lo != 0 && lo != n throw(DimensionMismatch("offset must have length $n or length 0 but was $lo")) @@ -51,29 +51,19 @@ function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::W) where {V<:FPVec return GlmResp{V,D,L,W}(y, d, l, similar(y), η, μ, off, wts, similar(y), similar(y)) end -function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::AbstractVector{<:Real}) +function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::AbstractWeights{<:Real}) # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly _y = convert(Vector{float(eltype(y))}, y) _off = convert(Vector{float(eltype(off))}, off) - _wts = if wts === nothing - ## This should be removed - here for allowing - ## passing a vector (deprecated) - aweights(similar(_y, 0)) - elseif isa(wts, AbstractWeights) - wts - elseif isa(wts, AbstractVector) - ## for backward compatibility - fweights(wts) - end η = similar(_y) μ = similar(_y) - r = GlmResp(_y, d, l, η, μ, _off, _wts) - initialeta!(r.eta, d, l, _y, _wts, _off) + r = GlmResp(_y, d, l, η, μ, _off, wts) + initialeta!(r.eta, d, l, _y, wts, _off) updateμ!(r, r.eta) return r end -function GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, wts::AbstractVector{<:Real}) where {D, L} +function GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, wts::AbstractWeights{<:Real}) where {D, L} GlmResp(float(y), d, l, float(off), wts) end @@ -296,7 +286,7 @@ function _fit!(m::AbstractGLM, verbose::Bool, maxiter::Integer, minstepfac::Real lp = r.mu # Initialize β, μ, and compute deviance - if start == nothing || isempty(start) + if start === nothing || isempty(start) # Compute beta update based on default response value # if no starting values have been passed delbeta!(p, wrkresp(r), r.wrkwt) @@ -481,17 +471,25 @@ function fit(::Type{M}, d::UnivariateDistribution, l::Link = canonicallink(d); dofit::Bool = true, - wts::AbstractWeights{<:Real}, - offset::AbstractVector{<:Real} = similar(y, 0), + wts::AbstractVector{<:Real} = uweights(length(y)), + offset::AbstractVector{<:Real} = similar(y, 0), fitargs...) where {M<:AbstractGLM} - + println("got you") # Check that X and y have the same number of observations if size(X, 1) != size(y, 1) throw(DimensionMismatch("number of rows in X and y must match")) end - - rr = GlmResp(y, d, l, offset, wts) - res = M(rr, cholpred(X, false, wts), false) + # For backward compatibility accept wts as AbstractArray and coerce them to FrequencyWeights + _wts = if isa(wts, AbstractWeights) + wts + elseif isa(wts, AbstractVector) + Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding by coercing wts to `FrequencyWeights`", :fit) + fweights(wts) + else + throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights")) + end + rr = GlmResp(y, d, l, offset, _wts) + res = M(rr, cholpred(X, false, _wts), false) return dofit ? fit!(res; fitargs...) : res end @@ -500,7 +498,7 @@ fit(::Type{M}, y::AbstractVector, d::UnivariateDistribution, l::Link=canonicallink(d); kwargs...) where {M<:AbstractGLM} = - fit(M, float(X), float(y), d, l; kwargs...) + fit(M, float(X), float(y), d, l; kwargs...) """ glm(formula, data, diff --git a/src/linpred.jl b/src/linpred.jl index b74d4b78..3cd46761 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -45,7 +45,7 @@ A `LinPred` type with a dense, unpivoted QR decomposition of `X` - `scratchbeta`: scratch vector of length `p`, used in `linpred!` method - `qr`: a `QRCompactWY` object created from `X`, with optional row weights. """ -mutable struct DensePredQR{T<:BlasReal, W<:AbstractVector{<:Real}} <: DensePred +mutable struct DensePredQR{T<:BlasReal, W<:AbstractWeights{<:Real}} <: DensePred X::Matrix{T} # model matrix Xw::Matrix{T} # weighted model matrix beta0::Vector{T} # base coefficient vector @@ -53,13 +53,14 @@ mutable struct DensePredQR{T<:BlasReal, W<:AbstractVector{<:Real}} <: DensePred scratchbeta::Vector{T} qr::QRCompactWY{T} wts::W + wresponse::Vector{T} function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}, wts::W) where {T,W<:AbstractWeights{<:Real}} n, p = size(X) length(beta0) == p || throw(DimensionMismatch("length(β0) ≠ size(X,2)")) (length(wts) == n || isempty(wts)) || throw(DimensionMismatch("Lenght of weights does not match the dimension of X")) - Xw = isempty(_wt) ? Matrix{T}(undef, 0, 0) : sqrt.(wts).*X - qrX = isempty(_wts) ? qr(X) : qr(Xw) - new{T,W}(X, Xw, beta0, zeros(T,p), zeros(T,p), qrX, wts) + Xw = wts isa UnitWeights ? Matrix{T}(undef, 0, 0) : sqrt.(wts).*X + qrX = wts isa UnitWeights ? qr(X) : qr(Xw) + new{T,W}(X, Xw, beta0, zeros(T,p), zeros(T,p), qrX, wts, similar(X, T, (size(X,1),) )) end function DensePredQR{T}(X::Matrix{T}, wts::W) where {T,W} n, p = size(X) @@ -70,9 +71,10 @@ mutable struct DensePredQR{T<:BlasReal, W<:AbstractVector{<:Real}} <: DensePred DensePredQR{T}(X, zeros(T, p), uweights(0)) end end +DensePredQR{T}(X::Matrix) where T = DensePredQR{eltype(X)}(X, zeros(T, size(X, 2)), uweights(size(X,1))) DensePredQR(X::Matrix, beta0::Vector, wts::AbstractVector) = DensePredQR{eltype(X)}(X, beta0, wts) DensePredQR(X::Matrix{T}, wts::AbstractVector) where T = DensePredQR{T}(X, zeros(T, size(X,2)), wts) -convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X, zeros(T, size(X, 2))) +convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X) """ delbeta!(p::LinPred, r::Vector) @@ -99,18 +101,20 @@ A `LinPred` type with a dense Cholesky factorization of `X'X` - `scratchbeta`: scratch vector of length `p`, used in `linpred!` method - `chol`: a `Cholesky` object created from `X'X`, possibly using row weights. - `scratchm1`: scratch Matrix{T} of the same size as `X` -- `scratchm2`: scratch Matrix{T} os the same size as `X'X` +- `scratchm2`: scratch Matrix{T} of the same size as `X'X` +- `scratchv1`: scratch Vector{T} of the same size of `y` """ -mutable struct DensePredChol{T<:BlasReal,W<:AbstractVector{<:Real},C} <: DensePred +mutable struct DensePredChol{T<:BlasReal,C,W<:AbstractVector{<:Real}} <: DensePred X::Matrix{T} # model matrix Xw::Matrix{T} # weighted model matrix beta0::Vector{T} # base vector for coefficients delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} + chol::C wts::W - chol::C scratchm1::Matrix{T} scratchm2::Matrix{T} + scratchv1::Vector{T} end function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Real}) Xw = isempty(wts) ? Matrix{eltype(X)}(undef, 0, 0) : sqrt.(wts).*X @@ -122,14 +126,16 @@ function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Re zeros(T, size(X, 2)), zeros(T, size(X, 2)), zeros(T, size(X, 2)), - wts, F, + wts, similar(X, T), - similar(cholfactors(F))) + similar(cholfactors(F)), + similar(X, T, (size(X,1),)) + ) end cholpred(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights) = DensePredChol(X, pivot, wts) -cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot, uweights(0)) +cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot, uweights(size(X,1))) cholfactors(c::Union{Cholesky,CholeskyPivoted}) = c.factors cholesky!(p::DensePredChol{T}) where {T<:FP} = p.chol @@ -141,15 +147,37 @@ function cholesky(p::DensePredChol{T}) where T<:FP end cholesky!(p::DensePredQR{T}) where {T<:FP} = Cholesky{T,typeof(p.X)}(p.qr.R, 'U', 0) -function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:Cholesky, <:UnitWeights}, r::Vector{T}) where T<:BlasReal ldiv!(p.chol, mul!(p.delbeta, transpose(p.X), r)) +end + +function delbeta!(p::DensePredChol{T,<:Cholesky, <:AbstractWeights}, r::Vector{T}) where T<:BlasReal + p.scratchv1 .= r.*sqrt(p.wts) + ldiv!(p.chol, mul!(p.delbeta, transpose(p.Xw), p.scratchv1)) +end + +function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:UnitWeights}, r::Vector{T}) where T<:BlasReal + ch = p.chol + delbeta = mul!(p.delbeta, adjoint(p.X), r) + rnk = rank(ch) + if rnk == length(delbeta) + ldiv!(ch, delbeta) + else + permute!(delbeta, ch.p) + for k=(rnk+1):length(delbeta) + delbeta[k] = -zero(T) + end + LAPACK.potrs!(ch.uplo, view(ch.factors, 1:rnk, 1:rnk), view(delbeta, 1:rnk)) + invpermute!(delbeta, ch.p) + end p end -function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Vector{T}) where T<:BlasReal ch = p.chol - Z = isempty(p.wts) ? p.X : p.Xw - delbeta = mul!(p.delbeta, adjoint(Z), r) + Z = p.Xw + p.scratchv1 .= r.*sqrt.(p.wts) + delbeta = mul!(p.delbeta, adjoint(p.Xw), p.scratchv1) rnk = rank(ch) if rnk == length(delbeta) ldiv!(ch, delbeta) @@ -164,66 +192,83 @@ function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Ve p end -function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:Cholesky}, r::Vector{T}) where T<:BlasReal - Z = isempty(p.wts) ? p.X : p.Xw - cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(Z), Z), :U)) - mul!(p.delbeta, transpose(Z), r) +function delbeta!(p::DensePredChol{T,<:Cholesky,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal + p.scratchm1 .= wt.*p.X + cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(p.scratchm1), p.X), :U)) + mul!(p.delbeta, transpose(p.scratchm1), r) ldiv!(p.chol, p.delbeta) p end -function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Vector{T}) where T<:BlasReal - Z = isempty(p.wts) ? p.X : p.Xw +function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal cf = cholfactors(p.chol) piv = p.chol.p - cf .= mul!(p.scratchm2, adjoint(Z), Z)[piv, piv] + p.scratchm1 .= wt.*p.X + cf .= mul!(p.scratchm2, adjoint(p.scratchm1), p.X)[piv, piv] cholesky!(Hermitian(cf, Symbol(p.chol.uplo))) - ldiv!(p.chol, mul!(p.delbeta, transpose(Z), r)) + ldiv!(p.chol, mul!(p.delbeta, transpose(p.scratchm1), r)) p end -mutable struct SparsePredChol{T,W<:AbstractWeights{<:Real},M<:SparseMatrixCSC,C} <: GLM.LinPred +mutable struct SparsePredChol{T,M<:SparseMatrixCSC,C,W<:AbstractWeights{<:Real}} <: GLM.LinPred X::M # model matrix Xw::M # weighted model matrix Xt::M # X' beta0::Vector{T} # base vector for coefficients delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} - wts::W chol::C scratch::M + wts::W end + function SparsePredChol(X::SparseMatrixCSC{T}, wts::AbstractVector) where T chol = cholesky(sparse(I, size(X, 2), size(X,2))) sqrtwts = sqrt.(wts) Xw = isempty(wts) ? SparseMatrixCSC(I, 0, 0) : sqrtwts.*X - return SparsePredChol{eltype(X),typeof(X),typeof(chol)}(X, + return SparsePredChol{eltype(X),typeof(X),typeof(chol), typeof(wts)}(X, Xw, - isempty(wts) ? X' : Xw', + X', zeros(T, size(X, 2)), zeros(T, size(X, 2)), zeros(T, size(X, 2)), chol, - similar(X)) + similar(X), + wts) end -cholpred(X::SparseMatrixCSC, pivot::Bool=false, wts::AbstractVector=uweights(0)) = SparsePredChol(X, wts) +cholpred(X::SparseMatrixCSC, pivot::Bool=false) = SparsePredChol(X, uweights(size(X,1))) +cholpred(X::SparseMatrixCSC, pivot::Bool, wts::AbstractWeights) = SparsePredChol(X, wts) -function delbeta!(p::SparsePredChol{T}, r::Vector{T}, wt::Vector{T}) where T - Z = isempty(p.wts) ? X : Xw - #scr = mul!(p.scratch, Diagonal(wt), Z) - XtWX = p.Xt*Z +function delbeta!(p::SparsePredChol{T,M,C,<:UnitWeights}, r::Vector{T}, wt::Vector{T}) where {T,M,C} + scr = mul!(p.scratch, Diagonal(wt), p.X) + XtWX = p.Xt*scr c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) - p.delbeta = c \ mul!(p.delbeta, adjoint(Z), r) + p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) end -function delbeta!(p::SparsePredChol{T}, r::Vector{T}) where T +function delbeta!(p::SparsePredChol{T,M,C,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where {T,M,C} + scr = mul!(p.scratch, Diagonal(wt.*p.wts), p.X) + XtWX = p.Xt*scr + c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) + p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) +end + +function delbeta!(p::SparsePredChol{T,M,C,<:UnitWeights}, r::Vector{T}) where {T,M,C} scr = p.scratch = p.X XtWX = p.Xt*scr c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) end +function delbeta!(p::SparsePredChol{T,M,C,<:AbstractWeights}, r::Vector{T}) where {T,M,C} + scr = p.scratch .= p.X.*p.wts + XtWX = p.Xt*scr + @show XtWX + c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) + p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) +end + LinearAlgebra.cholesky(p::SparsePredChol{T}) where {T} = copy(p.chol) LinearAlgebra.cholesky!(p::SparsePredChol{T}) where {T} = p.chol @@ -242,6 +287,7 @@ function invchol(x::DensePredChol{T,<: CholeskyPivoted}) where T ipiv = invperm(ch.p) res[ipiv, ipiv] end + invchol(x::SparsePredChol) = cholesky!(x) \ Matrix{Float64}(I, size(x.X, 2), size(x.X, 2)) function vcov(x::LinPredModel) @@ -250,7 +296,7 @@ function vcov(x::LinPredModel) rmul!(B, dispersion(x, true)) end -_covm(pp::DensePredChol{T, W}) where {T,W} = invchol(pp) +_covm(pp::LinPred) = invchol(pp) function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}) where {T} wts = pp.wts diff --git a/src/lm.jl b/src/lm.jl index 32c6a6d7..9d742958 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -12,7 +12,7 @@ Encapsulates the response for a linear model Either or both `offset` and `wts` may be of length 0 """ -mutable struct LmResp{V<:FPVector, W<:Union{AbstractWeights{<:Real}, AbstractVector{<:Real}}} <: ModResp # response in a linear model +mutable struct LmResp{V<:FPVector, W<:AbstractWeights{<:Real}} <: ModResp # response in a linear model mu::V # mean response offset::V # offset added to linear predictor (may have length 0) wts::W # prior weights (may have length 0) @@ -28,9 +28,9 @@ mutable struct LmResp{V<:FPVector, W<:Union{AbstractWeights{<:Real}, AbstractVec end end -function LmResp(y::AbstractVector{<:Real}, wts::Union{Nothing,AbstractVector{<:Real}, AbstractWeights{<:Real}}=nothing) +function LmResp(y::AbstractVector{<:Real}, wts::AbstractWeights{<:Real}) # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly - _y = convert(Vector{float(eltype(y))}, y) + _y = convert(Vector{float(eltype(y))}, y) return LmResp{typeof(_y), typeof(wts)}(zero(_y), zero(_y), wts, _y) end @@ -124,8 +124,8 @@ end LinearAlgebra.cholesky(x::LinearModel) = cholesky(x.pp) function StatsBase.fit!(obj::LinearModel) - delbeta!(obj.pp, obj.rr.y) - installbeta!(obj.pp) + delbeta!(obj.pp, obj.rr.y) + installbeta!(obj.pp) updateμ!(obj.rr, linpred(obj.pp, zero(eltype(obj.rr.y)))) return obj end @@ -166,19 +166,20 @@ $FIT_LM_DOC """ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}, allowrankdeficient_dep::Union{Bool,Nothing}=nothing; - wts::AbstractVector{<:Real}=similar(y, 0), + wts::AbstractVector{<:Real}=uweights(length(y)), dropcollinear::Bool=true) if allowrankdeficient_dep !== nothing @warn "Positional argument `allowrankdeficient` is deprecated, use keyword " * "argument `dropcollinear` instead. Proceeding with positional argument value: $allowrankdeficient_dep" dropcollinear = allowrankdeficient_dep end - _wts = if wts === nothing - uweights(0) - elseif isa(wts, AbstractWeights) + # For backward compatibility accept wts as AbstractArray and coerce them to FrequencyWeights + _wts = if isa(wts, AbstractWeights) wts elseif isa(wts, AbstractVector) - Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using AnalyticalWeights, ProbabilityWeights, or FrequencyWeights", :fit) + Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " * + "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * + "by coercing wts to `FrequencyWeights`", :fit) fweights(wts) else throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights")) diff --git a/test/runtests.jl b/test/runtests.jl index 90bd33a0..45663c74 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -152,9 +152,8 @@ end @test all(isnan, hcat(coeftable(m2p).cols[2:end]...)[7,:]) m2p_dep_pos = fit(LinearModel, Xmissingcell, ymissingcell, true) - @test_logs (:warn, "Positional argument `allowrankdeficient` is deprecated, use keyword " * "argument `dropcollinear` instead. Proceeding with positional argument value: true") (:warn, "Passing weights as vector is deprecated in favor of explicitely using " * "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights.") fit(LinearModel, Xmissingcell, ymissingcell, true) - # @test_logs (:warn, "Positional argument `allowrankdeficient` is deprecated, use keyword " * - # "argument `dropcollinear` instead. Proceeding with positional argument value: true") fit(LinearModel, Xmissingcell, ymissingcell, true) + @test_logs (:warn, "Positional argument `allowrankdeficient` is deprecated, use keyword " * + "argument `dropcollinear` instead. Proceeding with positional argument value: true") fit(LinearModel, Xmissingcell, ymissingcell, true) @test isa(m2p_dep_pos.pp.chol, CholeskyPivoted) @test rank(m2p_dep_pos.pp.chol) == rank(m2p.pp.chol) @test isapprox(deviance(m2p_dep_pos), deviance(m2p)) @@ -167,6 +166,16 @@ end @test isapprox(coef(m2p_dep_pos_kw), coef(m2p)) end +@testset "Passing wts (depwarn)" begin + df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3], wts = [3,3,3]) + @test_logs (:warn, "Passing weights as vector is deprecated in favor of explicitely using " * + "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * + "by coercing wts to `FrequencyWeights`") lm(@formula(y~x), df; wts=wts) + @test_logs (:warn, "Passing weights as vector is deprecated in favor of explicitely using " * + "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * + "by coercing wts to `FrequencyWeights`") glm(@formula(y~x), Normal(), IdentityLink(), df; wts=wts) +end + @testset "saturated linear model" begin df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3]) model = lm(@formula(y ~ x), df) @@ -1206,14 +1215,14 @@ end glm4 = glm(view(x, :, :), view(y, :), Binomial()) @test coef(glm1) == coef(glm2) == coef(glm3) == coef(glm4) - glm5 = glm(x, y, Binomial(), wts=w) - glm6 = glm(x, view(y, :), Binomial(), wts=w) - glm7 = glm(view(x, :, :), y, Binomial(), wts=w) - glm8 = glm(view(x, :, :), view(y, :), Binomial(), wts=w) - glm9 = glm(x, y, Binomial(), wts=view(w, :)) - glm10 = glm(x, view(y, :), Binomial(), wts=view(w, :)) - glm11 = glm(view(x, :, :), y, Binomial(), wts=view(w, :)) - glm12 = glm(view(x, :, :), view(y, :), Binomial(), wts=view(w, :)) + glm5 = glm(x, y, Binomial(), wts=fweights(w)) + glm6 = glm(x, view(y, :), Binomial(), wts=fweights(w)) + glm7 = glm(view(x, :, :), y, Binomial(), wts=fweights(w)) + glm8 = glm(view(x, :, :), view(y, :), Binomial(), wts=fweights(w)) + glm9 = glm(x, y, Binomial(), wts=fweights(view(w, :))) + glm10 = glm(x, view(y, :), Binomial(), wts=fweights(view(w, :))) + glm11 = glm(view(x, :, :), y, Binomial(), wts=fweights(view(w, :))) + glm12 = glm(view(x, :, :), view(y, :), Binomial(), wts=fweights(view(w, :))) @test coef(glm5) == coef(glm6) == coef(glm7) == coef(glm8) == coef(glm9) == coef(glm10) == coef(glm11) == coef(glm12) end From 84cd9901a6b71fce43446f504624c5727821226f Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 22 Jun 2022 12:09:12 +0200 Subject: [PATCH 7/8] Add depwarn for passing wts with Vector --- src/glmfit.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index 277c82ad..909d3210 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -474,7 +474,6 @@ function fit(::Type{M}, wts::AbstractVector{<:Real} = uweights(length(y)), offset::AbstractVector{<:Real} = similar(y, 0), fitargs...) where {M<:AbstractGLM} - println("got you") # Check that X and y have the same number of observations if size(X, 1) != size(y, 1) throw(DimensionMismatch("number of rows in X and y must match")) @@ -483,7 +482,9 @@ function fit(::Type{M}, _wts = if isa(wts, AbstractWeights) wts elseif isa(wts, AbstractVector) - Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding by coercing wts to `FrequencyWeights`", :fit) + Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " * + "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * + "by coercing wts to `FrequencyWeights`", :fit) fweights(wts) else throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights")) From cbc329f9e0e865b18f3a0a35baad2df7aed2a48f Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 22 Jun 2022 13:00:39 +0200 Subject: [PATCH 8/8] Cosmettic changes --- src/glmfit.jl | 17 ++++++++--------- src/linpred.jl | 51 +++++++++++++++++++++++++------------------------- src/lm.jl | 21 ++++++++++----------- 3 files changed, 43 insertions(+), 46 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index 909d3210..94058ef8 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -17,7 +17,7 @@ struct GlmResp{V<:FPVector, D<:UnivariateDistribution,L<:Link,W<:AbstractWeights mu::V "`offset:` offset added to `Xβ` to form `eta`. Can be of length 0" offset::V - "`wts:` prior case weights. Can be of length 0." + "`wts`: prior case weights. Can be of length 0." wts::W "`wrkwt`: working case weights for the Iteratively Reweighted Least Squares (IRLS) algorithm" wrkwt::V @@ -372,7 +372,7 @@ function StatsBase.fit!(m::AbstractGLM; if haskey(kwargs, :tol) Base.depwarn("`tol` argument is deprecated, use `atol` and `rtol` instead", :fit!) rtol = kwargs[:tol] - end + end _fit!(m, verbose, maxiter, minstepfac, atol, rtol, start) end @@ -438,9 +438,9 @@ const FIT_GLM_DOC = """ # Keyword Arguments - `dofit::Bool=true`: Determines whether model will be fit - - `wts::AbstractWeights=aweights(similar(y,0))`: Weights of observations. - Allowed weights are `AnalyticalWeights`, `FrequencyWeights`, or `ProbabilityWeights`. - If a vector is passed (deprecated) it is coerced to FrequencyWeights. + - `wts::AbstractWeights=aweights(similar(y,0))`: Weights of observations. + Allowed weights are `AnalyticalWeights`, `FrequencyWeights`, or `ProbabilityWeights`. + If a vector is passed (deprecated) it is coerced to FrequencyWeights. Can be length 0 to indicate no weighting (default). - `offset::Vector=similar(y,0)`: offset added to `Xβ` to form `eta`. Can be of length 0 @@ -482,8 +482,8 @@ function fit(::Type{M}, _wts = if isa(wts, AbstractWeights) wts elseif isa(wts, AbstractVector) - Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " * - "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * + Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " * + "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * "by coercing wts to `FrequencyWeights`", :fit) fweights(wts) else @@ -499,7 +499,7 @@ fit(::Type{M}, y::AbstractVector, d::UnivariateDistribution, l::Link=canonicallink(d); kwargs...) where {M<:AbstractGLM} = - fit(M, float(X), float(y), d, l; kwargs...) + fit(M, float(X), float(y), d, l; kwargs...) """ glm(formula, data, @@ -669,4 +669,3 @@ nobs(r::LmResp{V,W}) where {V,W<:FrequencyWeights} = isempty(r.wts) ? oftype(sum nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = isempty(r.wts) ? oftype(sum(one(eltype(r.wts))), length(r.y)) : r.wts.sum nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W} = oftype(sum(one(eltype(r.wts))), length(r.y)) - diff --git a/src/linpred.jl b/src/linpred.jl index 3cd46761..441f1211 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -1,5 +1,5 @@ """ -linpred!(out, p::LinPred, f::Real=1.0) + linpred!(out, p::LinPred, f::Real=1.0) Overwrite `out` with the linear predictor from `p` with factor `f` @@ -11,14 +11,14 @@ function linpred!(out, p::LinPred, f::Real=1.) end """ -linpred(p::LinPred, f::Real=1.0) + linpred(p::LinPred, f::Real=1.0) Return the linear predictor `p.X * (p.beta0 .+ f * p.delbeta)` """ linpred(p::LinPred, f::Real=1.) = linpred!(Vector{eltype(p.X)}(undef, size(p.X, 1)), p, f) """ -installbeta!(p::LinPred, f::Real=1.0) + installbeta!(p::LinPred, f::Real=1.0) Install `pbeta0 .+= f * p.delbeta` and zero out `p.delbeta`. Return the updated `p.beta0`. """ @@ -33,7 +33,7 @@ function installbeta!(p::LinPred, f::Real=1.) end """ -DensePredQR + DensePredQR A `LinPred` type with a dense, unpivoted QR decomposition of `X` @@ -122,16 +122,15 @@ function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Re T = eltype(F) F = pivot ? pivoted_cholesky!(F, tol = -one(T), check = false) : cholesky!(F) DensePredChol(Matrix{T}(X), - Matrix{T}(Xw), - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - F, - wts, - similar(X, T), - similar(cholfactors(F)), - similar(X, T, (size(X,1),)) - ) + Matrix{T}(Xw), + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + F, + wts, + similar(X, T), + similar(cholfactors(F)), + similar(X, T, (size(X,1),))) end cholpred(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights) = DensePredChol(X, pivot, wts) @@ -227,14 +226,14 @@ function SparsePredChol(X::SparseMatrixCSC{T}, wts::AbstractVector) where T sqrtwts = sqrt.(wts) Xw = isempty(wts) ? SparseMatrixCSC(I, 0, 0) : sqrtwts.*X return SparsePredChol{eltype(X),typeof(X),typeof(chol), typeof(wts)}(X, - Xw, - X', - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - chol, - similar(X), - wts) + Xw, + X', + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + chol, + similar(X), + wts) end cholpred(X::SparseMatrixCSC, pivot::Bool=false) = SparsePredChol(X, uweights(size(X,1))) @@ -290,7 +289,7 @@ end invchol(x::SparsePredChol) = cholesky!(x) \ Matrix{Float64}(I, size(x.X, 2), size(x.X, 2)) -function vcov(x::LinPredModel) +function vcov(x::LinPredModel) d = dispersion(x, true) B = _covm(x.pp) rmul!(B, dispersion(x, true)) @@ -298,7 +297,7 @@ end _covm(pp::LinPred) = invchol(pp) -function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}) where {T} +function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}) where {T} wts = pp.wts Z = pp.scratchm1 .= pp.X.*wts XtW2X = Z'Z @@ -306,7 +305,7 @@ function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}) where {T} invXtWX*XtW2X*invXtWX end -function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:CholeskyPivoted}) where {T} +function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:CholeskyPivoted}) where {T} wts = pp.wts Z = pp.scratchm1 .= pp.X.*wts rnk = rank(pp.chol) @@ -334,7 +333,7 @@ end modelframe(obj::LinPredModel) = obj.fr -function modelmatrix(obj::LinPredModel; weighted=false) +function modelmatrix(obj::LinPredModel; weighted=false) if !weighted obj.pp.X elseif !isempty(weights(obj)) diff --git a/src/lm.jl b/src/lm.jl index b49d6d8d..42a419c5 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -67,14 +67,14 @@ end function nullloglikelihood(r::LmResp) n = isempty(r.wts) ? length(r.y) : sum(r.wts) - -n/2 * (log(2π * nulldeviance(r)/n) + 1) + -n/2 * (log(2π * nulldeviance(r)/n) + 1) end -function residuals(r::LmResp; weighted=false) +function residuals(r::LmResp; weighted=false) wts = weights(r) res = r.y - r.mu - if !weighted - res + if !weighted + res elseif !isempty(wts) sqrt.(wts).*res else @@ -118,14 +118,14 @@ const FIT_LM_DOC = """ values of the dependent variable. The keyword argument `wts` can be an `AbstractWeights` specifying frequency weights for observations. - Weights allowed are: + Weights allowed are: - `AnalyticaWeights`: describe a non-random relative importance (usually between 0 and 1) for each observation. - `FrequencyWeights`: describe the number of times (or frequency) each observation was observed. - `ProbabilityWeights`: represent the inverse of the sampling probability for each observation, providing a correction mechanism for under- or over-sampling certain population groups - These weights gives equal point estimates but different standard errors. - If a vector is passed (deprecated), it is coerced to `FrequencyWeights`. + These weights gives equal point estimates but different standard errors. + If a vector is passed (deprecated), it is coerced to `FrequencyWeights`. `dropcollinear` controls whether or not `lm` accepts a model matrix which is less-than-full rank. If `true` (the default), only the first of each set of @@ -151,19 +151,18 @@ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{< @warn "Positional argument `allowrankdeficient` is deprecated, use keyword " * "argument `dropcollinear` instead. Proceeding with positional argument value: $allowrankdeficient_dep" dropcollinear = allowrankdeficient_dep - end + end # For backward compatibility accept wts as AbstractArray and coerce them to FrequencyWeights _wts = if isa(wts, AbstractWeights) wts elseif isa(wts, AbstractVector) - Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " * - "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * + Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " * + "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * "by coercing wts to `FrequencyWeights`", :fit) fweights(wts) else throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights")) end - fit!(LinearModel(LmResp(y, _wts), cholpred(X, dropcollinear, _wts))) end