From 1754cbd9ce7265e5c8426a4a1630f70145a25220 Mon Sep 17 00:00:00 2001
From: Giuseppe Ragusa <giuseppe.ragusa@gmail.com>
Date: Fri, 10 Jun 2022 20:53:44 +0200
Subject: [PATCH 1/8] WIP

---
 src/scratch.jl | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 src/scratch.jl

diff --git a/src/scratch.jl b/src/scratch.jl
new file mode 100644
index 00000000..561ecb22
--- /dev/null
+++ b/src/scratch.jl
@@ -0,0 +1,20 @@
+using GLM
+using DataFrames
+
+y = rand(10)
+x = rand(10,2)
+wts = rand(10)
+df = DataFrame(x, :auto)
+df.y = y
+df.wts = wts
+lm1 = lm(x,y)
+lmw = lm(x,y; wts = wts)
+lmf = lm(@formula(y~x1+x2), df)
+lmfw = lm(@formula(y~x1+x2), df; wts = wts)
+glm(x, y)
+
+
+cooksdistance(lm)
+
+
+

From 1d778a5b37013fc3333ba7b9f43b5a25ece29f23 Mon Sep 17 00:00:00 2001
From: Giuseppe Ragusa <giuseppe.ragusa@gmail.com>
Date: Wed, 15 Jun 2022 19:04:42 +0200
Subject: [PATCH 2/8] WIP

---
 src/GLM.jl       | 26 +++++++-------
 src/glmfit.jl    | 58 ++++++++++++++++++++----------
 src/linpred.jl   | 86 ++++++++++++++++++++++++++-------------------
 src/lm.jl        | 77 ++++++++++++++++++++++++----------------
 src/scratch.jl   | 65 +++++++++++++++++++++++++++++++---
 test/runtests.jl | 91 +++++++++++++++++++++++++++++++++++++++++++-----
 6 files changed, 294 insertions(+), 109 deletions(-)

diff --git a/src/GLM.jl b/src/GLM.jl
index 019f80e3..8625a48b 100644
--- a/src/GLM.jl
+++ b/src/GLM.jl
@@ -12,14 +12,14 @@ module GLM
     import Statistics: cor
     import StatsBase: coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual,
                       loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict,
-                      fitted, fit, model_response, response, modelmatrix, r2, r², adjr2, adjr², PValue
+                      fitted, fit, model_response, response, modelmatrix, r2, r², adjr2, adjr², PValue, weights
     import StatsFuns: xlogy
     import SpecialFunctions: erfc, erfcinv, digamma, trigamma
     import StatsModels: hasintercept
     export coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual,
            loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict,
            fitted, fit, fit!, model_response, response, modelmatrix, r2, r², adjr2, adjr²,
-           cooksdistance, hasintercept
+           cooksdistance, hasintercept, weights, AnalyticWeights, ProbabilityWeights, FrequencyWeights
 
     export
         # types
@@ -52,17 +52,17 @@ module GLM
         LinearModel,
 
         # functions
-        canonicallink,  # canonical link function for a distribution
-        deviance,       # deviance of fitted and observed responses
-        devresid,       # vector of squared deviance residuals
-        formula,        # extract the formula from a model
-        glm,            # general interface
-        linpred,        # linear predictor
-        lm,             # linear model
-        negbin,         # interface to fitting negative binomial regression
-        nobs,           # total number of observations
-        predict,        # make predictions
-        ftest           # compare models with an F test
+        canonicallink,     # canonical link function for a distribution
+        deviance,          # deviance of fitted and observed responses
+        devresid,          # vector of squared deviance residuals
+        formula,           # extract the formula from a model
+        glm,               # general interface
+        linpred,           # linear predictor
+        lm,                # linear model
+        negbin,            # interface to fitting negative binomial regression
+        nobs,              # total number of observations
+        predict,           # make predictions
+        ftest              # compare models with an F test
 
     const FP = AbstractFloat
     const FPVector{T<:FP} = AbstractArray{T,1}
diff --git a/src/glmfit.jl b/src/glmfit.jl
index 0c108296..942ffc18 100644
--- a/src/glmfit.jl
+++ b/src/glmfit.jl
@@ -3,7 +3,7 @@
 
 The response vector and various derived vectors in a generalized linear model.
 """
-struct GlmResp{V<:FPVector,D<:UnivariateDistribution,L<:Link} <: ModResp
+struct GlmResp{V<:FPVector, D<:UnivariateDistribution,L<:Link,W<:AbstractWeights{<:Real}} <: ModResp
     "`y`: response vector"
     y::V
     d::D
@@ -18,14 +18,14 @@ struct GlmResp{V<:FPVector,D<:UnivariateDistribution,L<:Link} <: ModResp
     "`offset:` offset added to `Xβ` to form `eta`.  Can be of length 0"
     offset::V
     "`wts:` prior case weights.  Can be of length 0."
-    wts::V
+    wts::W
     "`wrkwt`: working case weights for the Iteratively Reweighted Least Squares (IRLS) algorithm"
     wrkwt::V
     "`wrkresid`: working residuals for IRLS"
     wrkresid::V
 end
 
-function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::V) where {V<:FPVector, D, L}
+function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::W) where {V<:FPVector, D, L, W}
     n  = length(y)
     nη = length(η)
     nμ = length(μ)
@@ -48,14 +48,23 @@ function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::V) where {V<:FPVec
         throw(DimensionMismatch("offset must have length $n or length 0 but was $lo"))
     end
 
-    return GlmResp{V,D,L}(y, d, l, similar(y), η, μ, off, wts, similar(y), similar(y))
+    return GlmResp{V,D,L,W}(y, d, l, similar(y), η, μ, off, wts, similar(y), similar(y))
 end
 
-function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::FPVector)
+function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::AbstractVector{<:Real})
     # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly
     _y   = convert(Vector{float(eltype(y))}, y)
     _off = convert(Vector{float(eltype(off))}, off)
-    _wts = convert(Vector{float(eltype(wts))}, wts)
+    _wts = if wts === nothing
+        ## This should be removed - here for allowing
+        ## passing a vector (deprecated) 
+        aweights(similar(_y, 0))
+    elseif isa(wts, AbstractWeights)   
+        wts
+    elseif isa(wts, AbstractVector)
+        ## for backward compatibility
+        fweights(wts)
+    end
     η    = similar(_y)
     μ    = similar(_y)
     r    = GlmResp(_y, d, l, η, μ, _off, _wts)
@@ -64,13 +73,12 @@ function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::FPVe
     return r
 end
 
-function GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real},
-                 wts::AbstractVector{<:Real}) where {D, L}
-        GlmResp(float(y), d, l, float(off), float(wts))
+function GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, wts::AbstractVector{<:Real}) where {D, L}
+    GlmResp(float(y), d, l, float(off), wts)
 end
 
 deviance(r::GlmResp) = sum(r.devresid)
-
+weights(r::GlmResp) = r.wts
 """
     cancancel(r::GlmResp{V,D,L})
 
@@ -374,7 +382,7 @@ function StatsBase.fit!(m::AbstractGLM;
     if haskey(kwargs, :tol)
         Base.depwarn("`tol` argument is deprecated, use `atol` and `rtol` instead", :fit!)
         rtol = kwargs[:tol]
-    end
+    end    
 
     _fit!(m, verbose, maxiter, minstepfac, atol, rtol, start)
 end
@@ -440,12 +448,9 @@ const FIT_GLM_DOC = """
 
     # Keyword Arguments
     - `dofit::Bool=true`: Determines whether model will be fit
-    - `wts::Vector=similar(y,0)`: Prior frequency (a.k.a. case) weights of observations.
-      Such weights are equivalent to repeating each observation a number of times equal
-      to its weight. Do note that this interpretation gives equal point estimates but
-      different standard errors from analytical (a.k.a. inverse variance) weights and
-      from probability (a.k.a. sampling) weights which are the default in some other
-      software.
+    - `wts::AbstractWeights=aweights(similar(y,0))`: Weights of observations. 
+      Allowed weights are `AnalyticalWeights`, `FrequencyWeights`, or `ProbabilityWeights`.        
+      If a vector is passed (deprecated) it is coerced to FrequencyWeights. 
       Can be length 0 to indicate no weighting (default).
     - `offset::Vector=similar(y,0)`: offset added to `Xβ` to form `eta`.  Can be of
       length 0
@@ -476,7 +481,7 @@ function fit(::Type{M},
     d::UnivariateDistribution,
     l::Link = canonicallink(d);
     dofit::Bool = true,
-    wts::AbstractVector{<:Real}      = similar(y, 0),
+    wts::Union{AbstractWeights{<:Real}, AbstractVector{<:Real}} = aweights(similar(y, 0)),
     offset::AbstractVector{<:Real}   = similar(y, 0),
     fitargs...) where {M<:AbstractGLM}
 
@@ -537,6 +542,7 @@ function dispersion(m::AbstractGLM, sqr::Bool=false)
     end
 end
 
+
 """
     predict(mm::AbstractGLM, newX::AbstractMatrix; offset::FPVector=eltype(newX)[],
             interval::Union{Symbol,Nothing}=nothing, level::Real = 0.95,
@@ -649,3 +655,19 @@ function checky(y, d::Binomial)
     end
     return nothing
 end
+
+"""
+    nobs(obj::LinearModel)
+    nobs(obj::GLM)
+
+For linear and generalized linear models, returns the number of rows, or,
+when prior weights of type FrequencyWeights are specified, the sum of weights.
+"""
+nobs(obj::LinPredModel) = nobs(obj.rr)
+
+nobs(r::LmResp{V,W}) where {V,W} = oftype(sum(one(eltype(r.wts))), length(r.y))
+nobs(r::LmResp{V,W}) where {V,W<:FrequencyWeights} = r.wts.sum
+
+nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = r.wts.sum
+nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W} = oftype(sum(one(eltype(r.wts))), length(r.y))
+
diff --git a/src/linpred.jl b/src/linpred.jl
index 4274f575..553fb503 100644
--- a/src/linpred.jl
+++ b/src/linpred.jl
@@ -1,5 +1,5 @@
 """
-    linpred!(out, p::LinPred, f::Real=1.0)
+linpred!(out, p::LinPred, f::Real=1.0)
 
 Overwrite `out` with the linear predictor from `p` with factor `f`
 
@@ -11,14 +11,14 @@ function linpred!(out, p::LinPred, f::Real=1.)
 end
 
 """
-    linpred(p::LinPred, f::Real=1.0)
+linpred(p::LinPred, f::Real=1.0)
 
 Return the linear predictor `p.X * (p.beta0 .+ f * p.delbeta)`
 """
 linpred(p::LinPred, f::Real=1.) = linpred!(Vector{eltype(p.X)}(undef, size(p.X, 1)), p, f)
 
 """
-    installbeta!(p::LinPred, f::Real=1.0)
+installbeta!(p::LinPred, f::Real=1.0)
 
 Install `pbeta0 .+= f * p.delbeta` and zero out `p.delbeta`.  Return the updated `p.beta0`.
 """
@@ -33,7 +33,7 @@ function installbeta!(p::LinPred, f::Real=1.)
 end
 
 """
-    DensePredQR
+DensePredQR
 
 A `LinPred` type with a dense, unpivoted QR decomposition of `X`
 
@@ -66,7 +66,7 @@ DensePredQR(X::Matrix{T}) where T = DensePredQR{T}(X, zeros(T, size(X,2)))
 convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X, zeros(T, size(X, 2)))
 
 """
-    delbeta!(p::LinPred, r::Vector)
+delbeta!(p::LinPred, r::Vector)
 
 Evaluate and return `p.delbeta` the increment to the coefficient vector from residual `r`
 """
@@ -78,7 +78,7 @@ function delbeta!(p::DensePredQR{T}, r::Vector{T}) where T<:BlasReal
 end
 
 """
-    DensePredChol{T}
+DensePredChol{T}
 
 A `LinPred` type with a dense Cholesky factorization of `X'X`
 
@@ -106,12 +106,12 @@ function DensePredChol(X::AbstractMatrix, pivot::Bool)
     T = eltype(F)
     F = pivot ? pivoted_cholesky!(F, tol = -one(T), check = false) : cholesky!(F)
     DensePredChol(Matrix{T}(X),
-        zeros(T, size(X, 2)),
-        zeros(T, size(X, 2)),
-        zeros(T, size(X, 2)),
-        F,
-        similar(X, T),
-        similar(cholfactors(F)))
+    zeros(T, size(X, 2)),
+    zeros(T, size(X, 2)),
+    zeros(T, size(X, 2)),
+    F,
+    similar(X, T),
+    similar(cholfactors(F)))
 end
 
 cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot)
@@ -177,12 +177,12 @@ end
 function SparsePredChol(X::SparseMatrixCSC{T}) where T
     chol = cholesky(sparse(I, size(X, 2), size(X,2)))
     return SparsePredChol{eltype(X),typeof(X),typeof(chol)}(X,
-        X',
-        zeros(T, size(X, 2)),
-        zeros(T, size(X, 2)),
-        zeros(T, size(X, 2)),
-        chol,
-        similar(X))
+    X',
+    zeros(T, size(X, 2)),
+    zeros(T, size(X, 2)),
+    zeros(T, size(X, 2)),
+    chol,
+    similar(X))
 end
 
 cholpred(X::SparseMatrixCSC, pivot::Bool=false) = SparsePredChol(X)
@@ -220,6 +220,7 @@ function invchol(x::DensePredChol{T,<: CholeskyPivoted}) where T
     res[ipiv, ipiv]
 end
 invchol(x::SparsePredChol) = cholesky!(x) \ Matrix{Float64}(I, size(x.X, 2), size(x.X, 2))
+## For ProbabilityWeights the variance is diferent
 vcov(x::LinPredModel) = rmul!(invchol(x.pp), dispersion(x, true))
 
 function cor(x::LinPredModel)
@@ -235,28 +236,42 @@ function show(io::IO, obj::LinPredModel)
 end
 
 modelframe(obj::LinPredModel) = obj.fr
-modelmatrix(obj::LinPredModel) = obj.pp.X
+
+function modelmatrix(obj::LinPredModel; weighted=false)  
+    wts = weights(obj)
+    X = obj.pp.X
+    if !weighted 
+        X
+    elseif !isempty(wts)
+        wts_times_X(X, wts)
+    else
+        throw(ArgumentError("`weighted=true` allowed only for weighted models."))
+    end
+end
+
+function wts_times_X(X::AbstractSparseMatrix, wts::AbstractArray)
+    Z = copy(X)
+    rows = rowvals(Z)
+    vals = nonzeros(Z)
+    m, n = size(Z)
+    for j = 1:n
+        for i in nzrange(Z, j)
+            r = rows[i]         
+            vals[i] *= sqrt(wts[r])
+        end
+    end 
+    return Z
+end
+
+wts_times_X(X::AbstractMatrix, wts::AbstractArray) = sqrt.(wts).*X
+    
 response(obj::LinPredModel) = obj.rr.y
 
 fitted(m::LinPredModel) = m.rr.mu
 predict(mm::LinPredModel) = fitted(mm)
 StatsModels.formula(obj::LinPredModel) = modelframe(obj).formula
-residuals(obj::LinPredModel) = residuals(obj.rr)
-
-"""
-    nobs(obj::LinearModel)
-    nobs(obj::GLM)
-
-For linear and generalized linear models, returns the number of rows, or,
-when prior weights are specified, the sum of weights.
-"""
-function nobs(obj::LinPredModel)
-    if isempty(obj.rr.wts)
-        oftype(sum(one(eltype(obj.rr.wts))), length(obj.rr.y))
-    else
-        sum(obj.rr.wts)
-    end
-end
+residuals(obj::LinPredModel; kwarg...) = residuals(obj.rr; kwarg...)
+weights(obj::LinPredModel) = weights(obj.rr)
 
 coef(x::LinPred) = x.beta0
 coef(obj::LinPredModel) = coef(obj.pp)
@@ -264,3 +279,4 @@ coef(obj::LinPredModel) = coef(obj.pp)
 dof_residual(obj::LinPredModel) = nobs(obj) - dof(obj) + 1
 
 hasintercept(m::LinPredModel) = any(i -> all(==(1), view(m.pp.X , :, i)), 1:size(m.pp.X, 2))
+    
\ No newline at end of file
diff --git a/src/lm.jl b/src/lm.jl
index ed11f450..994ac895 100644
--- a/src/lm.jl
+++ b/src/lm.jl
@@ -7,46 +7,48 @@ Encapsulates the response for a linear model
 
 - `mu`: current value of the mean response vector or fitted value
 - `offset`: optional offset added to the linear predictor to form `mu`
-- `wts`: optional vector of prior frequency (a.k.a. case) weights for observations
+- `wts`: optional weights for observations (AbstractWeights)
 - `y`: observed response vector
 
 Either or both `offset` and `wts` may be of length 0
 """
-mutable struct LmResp{V<:FPVector} <: ModResp  # response in a linear model
+mutable struct LmResp{V<:FPVector, W<:Union{AbstractWeights{<:Real}, AbstractVector{<:Real}}} <: ModResp  # response in a linear model
     mu::V                                  # mean response
     offset::V                              # offset added to linear predictor (may have length 0)
-    wts::V                                 # prior weights (may have length 0)
+    wts::W                                 # prior weights (may have length 0)
     y::V                                   # response
-    function LmResp{V}(mu::V, off::V, wts::V, y::V) where V
+    function LmResp{V, W}(mu::V, off::V, wts::W, y::V) where {V, W}
         n = length(y)
         length(mu) == n || error("mismatched lengths of mu and y")
         ll = length(off)
         ll == 0 || ll == n || error("length of offset is $ll, must be $n or 0")
         ll = length(wts)
         ll == 0 || ll == n || error("length of wts is $ll, must be $n or 0")
-        new{V}(mu, off, wts, y)
+        new{V,W}(mu, off, wts, y)
     end
 end
 
-function LmResp(y::AbstractVector{<:Real}, wts::Union{Nothing,AbstractVector{<:Real}}=nothing)
+function LmResp(y::AbstractVector{<:Real}, wts::Union{Nothing,AbstractVector{<:Real}, AbstractWeights{<:Real}}=nothing)
     # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly
     _y = convert(Vector{float(eltype(y))}, y)
-    _wts = if wts === nothing
-        similar(_y, 0)
+    _wts = if wts === nothing         
+        aweights(similar(_y, 0))
+    elseif isa(wts, Vector)
+        fweights(wts)
     else
-        convert(Vector{float(eltype(wts))}, wts)
+        wts
     end
-    return LmResp{typeof(_y)}(zero(_y), zero(_y), _wts, _y)
+    return LmResp{typeof(_y), typeof(_wts)}(zero(_y), zero(_y), _wts, _y)
 end
 
-function updateμ!(r::LmResp{V}, linPr::V) where V<:FPVector
+function updateμ!(r::LmResp{V, W}, linPr::V) where {V<:FPVector, W}
     n = length(linPr)
     length(r.y) == n || error("length(linPr) is $n, should be $(length(r.y))")
     length(r.offset) == 0 ? copyto!(r.mu, linPr) : broadcast!(+, r.mu, linPr, r.offset)
     deviance(r)
 end
 
-updateμ!(r::LmResp{V}, linPr) where {V<:FPVector} = updateμ!(r, convert(V, vec(linPr)))
+updateμ!(r::LmResp{V, W}, linPr) where {V<:FPVector, W} = updateμ!(r, convert(V, vec(linPr)))
 
 function deviance(r::LmResp)
     y = r.y
@@ -97,7 +99,19 @@ function nullloglikelihood(r::LmResp)
     -n/2 * (log(2π * nulldeviance(r)/n) + 1) 
 end
 
-residuals(r::LmResp) = r.y - r.mu
+function residuals(r::LmResp; weighted=false) 
+    wts = weights(r)
+    res = r.y - r.mu
+    if !weighted 
+        res 
+    elseif !isempty(wts)
+        sqrt.(wts).*res
+    else
+        throw(ArgumentError("`weighted=true` allowed only for weighted models."))
+    end
+end
+
+weights(r::LmResp) = r.wts
 
 """
     LinearModel
@@ -120,7 +134,7 @@ function StatsBase.fit!(obj::LinearModel)
     if isempty(obj.rr.wts)
         delbeta!(obj.pp, obj.rr.y)
     else 
-        delbeta!(obj.pp, obj.rr.y, obj.rr.wts)
+        delbeta!(obj.pp, obj.rr.y, convert(Vector{eltype(obj.rr.y)}, obj.rr.wts))
     end
     installbeta!(obj.pp)     
     updateμ!(obj.rr, linpred(obj.pp, zero(eltype(obj.rr.y))))
@@ -135,12 +149,15 @@ const FIT_LM_DOC = """
     in columns (including if appropriate the intercept), and `y` must be a vector holding
     values of the dependent variable.
 
-    The keyword argument `wts` can be a `Vector` specifying frequency weights for observations.
-    Such weights are equivalent to repeating each observation a number of times equal
-    to its weight. Do note that this interpretation gives equal point estimates but
-    different standard errors from analytical (a.k.a. inverse variance) weights and
-    from probability (a.k.a. sampling) weights which are the default in some other
-    software.
+    The keyword argument `wts` can be an `AbstractWeights` specifying frequency weights for observations.
+    Weights allowed are: 
+    - `AnalyticaWeights`: describe a non-random relative importance (usually between 0 and 1)
+    for each observation.
+    - `FrequencyWeights`: describe the number of times (or frequency) each observation was observed.
+    - `ProbabilityWeights`: represent the inverse of the sampling probability for each observation,
+    providing a correction mechanism for under- or over-sampling certain population groups
+    These weights gives equal point estimates but different standard errors. 
+    If a vector is passed (deprecated), it is coerced to `FrequencyWeights`. 
 
     `dropcollinear` controls whether or not `lm` accepts a model matrix which
     is less-than-full rank. If `true` (the default), only the first of each set of
@@ -166,6 +183,9 @@ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{<
         @warn "Positional argument `allowrankdeficient` is deprecated, use keyword " *
               "argument `dropcollinear` instead. Proceeding with positional argument value: $allowrankdeficient_dep"
         dropcollinear = allowrankdeficient_dep
+    end    
+    if isa(wts, Vector)
+        Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using AnalyticalWeights, ProbabilityWeights, or FrequencyWeights.", :fit)
     end
     fit!(LinearModel(LmResp(y, wts), cholpred(X, dropcollinear)))
 end
@@ -206,6 +226,7 @@ nullloglikelihood(obj::LinearModel) = nullloglikelihood(obj.rr)
 
 r2(obj::LinearModel) = 1 - deviance(obj)/nulldeviance(obj)
 
+
 function adjr2(obj::LinearModel)
     n = nobs(obj)
     # dof() includes the dispersion parameter
@@ -299,19 +320,15 @@ of each data point.
 Currently only implemented for linear models without weights.
 """
 function StatsBase.cooksdistance(obj::LinearModel)
-    u = residuals(obj)
-    mse = dispersion(obj,true)
+    wts = weights(obj)
+    u = residuals(obj; weighted=!isempty(wts))
+    mse = GLM.dispersion(obj,true)
     k = dof(obj)-1
     d_res = dof_residual(obj)
-    X = modelmatrix(obj)
-    XtX = crossmodelmatrix(obj)
+    X = modelmatrix(obj; weighted=!isempty(wts))
+    XtX = crossmodelmatrix(obj; weighted=!isempty(wts))
     k == size(X,2) || throw(ArgumentError("Models with collinear terms are not currently supported."))
-    wts = obj.rr.wts
-    if isempty(wts)
-        hii = diag(X * inv(XtX) * X')
-    else
-        throw(ArgumentError("Weighted models are not currently supported."))
-    end
+    hii = diag(X * inv(XtX) * X')
     D = @. u^2 * (hii / (1 - hii)^2) / (k*mse)
     return D
 end
diff --git a/src/scratch.jl b/src/scratch.jl
index 561ecb22..4f2612b0 100644
--- a/src/scratch.jl
+++ b/src/scratch.jl
@@ -1,5 +1,10 @@
 using GLM
 using DataFrames
+using Random
+using CSV
+using StatsBase
+using RDatasets
+Random.seed!(11)
 
 y = rand(10)
 x = rand(10,2)
@@ -9,12 +14,64 @@ df.y = y
 df.wts = wts
 lm1 = lm(x,y)
 lmw = lm(x,y; wts = wts)
-lmf = lm(@formula(y~x1+x2), df)
-lmfw = lm(@formula(y~x1+x2), df; wts = wts)
-glm(x, y)
+lmf = lm(@formula(y~x1+x2-1), df)
+lmfw = lm(@formula(y~-1+x1+x2), df; wts = aweights(wts))
+lmfw = lm(@formula(y~-1+x1+x2), df; wts = pweights(wts))
+lmfw = lm(@formula(y~-1+x1+x2), df; wts = fweights(wts))
 
+glm(@formula(y~-1+x1+x2), df, Normal, IdentityLink; wts = fweights(wts))
 
-cooksdistance(lm)
+cooksdistance(lm1)
 
 
 
+df = dataset("quantreg", "engel")
+N = nrow(df)
+df.weights = repeat(1:5, Int(N/5))
+f = @formula(FoodExp ~ Income)
+lm_model = lm(f, df, wts = FrequencyWeights(df.weights))
+glm_model = glm(f, df, Normal(), wts = FrequencyWeights(df.weights))
+@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505])
+@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505])
+@test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968])
+@test isapprox(r2(lm_model), 0.8330258148644486)
+@test isapprox(adjr2(lm_model), 0.832788298242634)
+@test isapprox(vcov(lm_model), [88.02760245551447 -0.06772589439264813; 
+                                -0.06772589439264813 6.670664781664879e-5])
+@test isapprox(first(predict(lm_model)), 357.57694841780994)
+@test isapprox(loglikelihood(lm_model), -4353.946729075838)
+@test isapprox(loglikelihood(glm_model), -4353.946729075838)
+@test isapprox(nullloglikelihood(lm_model), -4984.892139711452)
+@test isapprox(mean(residuals(lm_model)), -5.412966629787718) 
+
+lm_model = lm(f, df, wts = df.weights)
+glm_model = glm(f, df, Normal(), wts = df.weights)
+@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505])
+@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505])
+@test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968])
+@test isapprox(r2(lm_model), 0.8330258148644486)
+@test isapprox(adjr2(lm_model), 0.832788298242634)
+@test isapprox(vcov(lm_model), [88.02760245551447 -0.06772589439264813; 
+                                -0.06772589439264813 6.670664781664879e-5])
+@test isapprox(first(predict(lm_model)), 357.57694841780994)
+@test isapprox(loglikelihood(lm_model), -4353.946729075838)
+@test isapprox(loglikelihood(glm_model), -4353.946729075838)
+@test isapprox(nullloglikelihood(lm_model), -4984.892139711452)
+@test isapprox(mean(residuals(lm_model)), -5.412966629787718) 
+
+
+
+lm_model = lm(f, df, wts = aweights(df.weights))
+glm_model = glm(f, df, Normal(), wts = aweights(df.weights))
+@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505])
+@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505])
+@test isapprox(stderror(lm_model), [16.297055281313032, 0.014186793927918842])
+@test isapprox(r2(lm_model), 0.8330258148644486)
+@test isapprox(adjr2(lm_model), 0.8323091874604334)
+@test isapprox(vcov(lm_model), [265.59401084217296   -0.20434035947652907; 
+                                 -0.20434035947652907 0.00020126512195323495])
+@test isapprox(first(predict(lm_model)), 357.57694841780994)
+@test isapprox(loglikelihood(lm_model), -4353.946729075838)
+@test isapprox(loglikelihood(glm_model), -4353.946729075838)
+@test isapprox(nullloglikelihood(lm_model), -4984.892139711452)
+@test isapprox(mean(residuals(lm_model)), -5.412966629787718) 
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index e38c10f4..90bd33a0 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -83,12 +83,13 @@ end
 end
 
 @testset "linear model with weights" begin 
+
     df = dataset("quantreg", "engel")
     N = nrow(df)
     df.weights = repeat(1:5, Int(N/5))
     f = @formula(FoodExp ~ Income)
-    lm_model = lm(f, df, wts = df.weights)
-    glm_model = glm(f, df, Normal(), wts = df.weights)
+    lm_model = lm(f, df, wts = FrequencyWeights(df.weights))
+    glm_model = glm(f, df, Normal(), wts = FrequencyWeights(df.weights))
     @test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505])
     @test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505])
     @test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968])
@@ -101,6 +102,29 @@ end
     @test isapprox(loglikelihood(glm_model), -4353.946729075838)
     @test isapprox(nullloglikelihood(lm_model), -4984.892139711452)
     @test isapprox(mean(residuals(lm_model)), -5.412966629787718) 
+    
+    lm_model = lm(f, df, wts = df.weights)
+    glm_model = glm(f, df, Normal(), wts = df.weights)
+    @test isa(weights(lm_model), FrequencyWeights)
+    @test isa(weights(glm_model), FrequencyWeights)
+    
+    
+    
+    
+    lm_model = lm(f, df, wts = aweights(df.weights))
+    glm_model = glm(f, df, Normal(), wts = aweights(df.weights))
+    @test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505])
+    @test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505])
+    @test isapprox(stderror(lm_model), [16.297055281313032, 0.014186793927918842])
+    @test isapprox(r2(lm_model), 0.8330258148644486)
+    @test isapprox(adjr2(lm_model), 0.8323091874604334)
+    @test isapprox(vcov(lm_model), [265.59401084217296   -0.20434035947652907; 
+                                     -0.20434035947652907 0.00020126512195323495])
+    @test isapprox(first(predict(lm_model)), 357.57694841780994)
+    @test isapprox(loglikelihood(lm_model), -4353.946729075838)
+    @test isapprox(loglikelihood(glm_model), -4353.946729075838)
+    @test isapprox(nullloglikelihood(lm_model), -4984.892139711452)
+    @test isapprox(mean(residuals(lm_model)), -5.412966629787718) 
 end
 
 @testset "rankdeficient" begin
@@ -128,8 +152,9 @@ end
     @test all(isnan, hcat(coeftable(m2p).cols[2:end]...)[7,:])
 
     m2p_dep_pos = fit(LinearModel, Xmissingcell, ymissingcell, true)
-    @test_logs (:warn, "Positional argument `allowrankdeficient` is deprecated, use keyword " *
-                "argument `dropcollinear` instead. Proceeding with positional argument value: true") fit(LinearModel, Xmissingcell, ymissingcell, true)
+    @test_logs (:warn, "Positional argument `allowrankdeficient` is deprecated, use keyword " * "argument `dropcollinear` instead. Proceeding with positional argument value: true")  (:warn, "Passing weights as vector is deprecated in favor of explicitely using " * "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights.") fit(LinearModel, Xmissingcell, ymissingcell, true)
+    # @test_logs (:warn, "Positional argument `allowrankdeficient` is deprecated, use keyword " *
+    #             "argument `dropcollinear` instead. Proceeding with positional argument value: true") fit(LinearModel, Xmissingcell, ymissingcell, true)
     @test isa(m2p_dep_pos.pp.chol, CholeskyPivoted)
     @test rank(m2p_dep_pos.pp.chol) == rank(m2p.pp.chol)
     @test isapprox(deviance(m2p_dep_pos), deviance(m2p))
@@ -407,10 +432,10 @@ admit_agr = DataFrame(count = [28., 97, 93, 55, 33, 54, 28, 12],
                       admit = repeat([false, true], inner=[4]),
                       rank = categorical(repeat(1:4, outer=2)))
 
-@testset "Aggregated Binomial LogitLink" begin
+@testset "Aggregated Binomial LogitLink (FrequencyWeights)" begin
     for distr in (Binomial, Bernoulli)
         gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr(),
-                   wts=Array(admit_agr.count))
+                   wts=fweights(admit_agr.count))
         @test dof(gm14) == 4
         @test nobs(gm14) == 400
         @test isapprox(deviance(gm14), 474.9667184280627)
@@ -421,8 +446,25 @@ admit_agr = DataFrame(count = [28., 97, 93, 55, 33, 54, 28, 12],
         @test isapprox(coef(gm14),
             [0.164303051291, -0.7500299832, -1.36469792994, -1.68672866457], atol=1e-5)
     end
+    
 end
 
+@testset "Aggregated Binomial LogitLink (AnalyticWeights)" begin
+    for distr in (Binomial, Bernoulli)
+        gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr(),
+                   wts=aweights(admit_agr.count))
+        @test dof(gm14) == 4
+        @test nobs(gm14) == 8
+        @test isapprox(deviance(gm14), 474.9667184280627)
+        @test isapprox(loglikelihood(gm14), -237.48335921403134)
+        @test isapprox(aic(gm14), 482.96671842822883)
+        @test isapprox(aicc(gm14), 496.3000517613874)
+        @test isapprox(bic(gm14), 483.28448459477346)
+        @test isapprox(coef(gm14),
+            [0.164303051291, -0.7500299832, -1.36469792994, -1.68672866457], atol=1e-5)
+    end
+    
+end
 # Logistic regression using aggregated data with proportions of successes and weights
 admit_agr2 = DataFrame(Any[[61., 151, 121, 67], [33., 54, 28, 12], categorical(1:4)],
     [:count, :admit, :rank])
@@ -431,7 +473,7 @@ admit_agr2.p = admit_agr2.admit ./ admit_agr2.count
 ## The model matrix here is singular so tests like the deviance are just round off error
 @testset "Binomial LogitLink aggregated" begin
     gm15 = fit(GeneralizedLinearModel, @formula(p ~ rank), admit_agr2, Binomial(),
-               wts=admit_agr2.count)
+               wts=fweights(admit_agr2.count))
     test_show(gm15)
     @test dof(gm15) == 4
     @test nobs(gm15) == 400
@@ -446,7 +488,7 @@ end
 # Weighted Gamma example (weights are totally made up)
 @testset "Gamma InverseLink Weights" begin
     gm16 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma(),
-               wts=[1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7])
+               wts=fweights([1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]))
     test_show(gm16)
     @test dof(gm16) == 3
     @test nobs(gm16) == 32.7
@@ -461,7 +503,7 @@ end
 # Weighted Poisson example (weights are totally made up)
 @testset "Poisson LogLink Weights" begin
     gm17 = fit(GeneralizedLinearModel, @formula(Counts ~ Outcome + Treatment), dobson, Poisson(),
-        wts = [1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7])
+        wts = fweights([1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]))
     test_show(gm17)
     @test dof(gm17) == 5
     @test isapprox(deviance(gm17), 17.699857821414266)
@@ -618,6 +660,37 @@ end
     end
 end
 
+@testset "Sparse LM (weighted)" begin
+    rng = StableRNG(1)
+    X = sprand(rng, 1000, 10, 0.01)
+    β = randn(rng, 10)
+    y = Bool[rand(rng) < logistic(x) for x in X * β]
+    wts = rand(1000)
+    gmsparsev = [fit(LinearModel, X, y; wts=fweights(wts)),
+                 fit(LinearModel, X, sparse(y); wts=fweights(wts)),
+                 fit(LinearModel, Matrix(X), sparse(y); wts=fweights(wts))]
+    gmdense = fit(LinearModel, Matrix(X), y; wts=fweights(wts))
+
+    for gmsparse in gmsparsev
+        @test isapprox(deviance(gmsparse), deviance(gmdense))
+        @test isapprox(coef(gmsparse), coef(gmdense))
+        @test isapprox(vcov(gmsparse), vcov(gmdense))
+        @test isapprox(Matrix(modelmatrix(gmsparse; weighted=true)), modelmatrix(gmdense; weighted=true))
+    end
+
+    gmsparsev = [fit(LinearModel, X, y; wts=aweights(wts)),
+                 fit(LinearModel, X, sparse(y); wts=aweights(wts)),
+                 fit(LinearModel, Matrix(X), sparse(y); wts=aweights(wts))]
+    gmdense = fit(LinearModel, Matrix(X), y; wts=aweights(wts))
+
+    for gmsparse in gmsparsev
+        @test isapprox(deviance(gmsparse), deviance(gmdense))
+        @test isapprox(coef(gmsparse), coef(gmdense))
+        @test isapprox(vcov(gmsparse), vcov(gmdense))
+        @test isapprox(Matrix(modelmatrix(gmsparse; weighted=true)), modelmatrix(gmdense; weighted=true))
+    end
+end
+
 @testset "Predict" begin
     rng = StableRNG(123)
     X = rand(rng, 10, 2)

From 12121a31b2a3642f065f2e0e152a2de3ef8bd30a Mon Sep 17 00:00:00 2001
From: Giuseppe Ragusa <giuseppe.ragusa@gmail.com>
Date: Wed, 15 Jun 2022 19:20:30 +0200
Subject: [PATCH 3/8] WIP

---
 src/glmfit.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/glmfit.jl b/src/glmfit.jl
index 942ffc18..e9ad3f6c 100644
--- a/src/glmfit.jl
+++ b/src/glmfit.jl
@@ -666,8 +666,8 @@ when prior weights of type FrequencyWeights are specified, the sum of weights.
 nobs(obj::LinPredModel) = nobs(obj.rr)
 
 nobs(r::LmResp{V,W}) where {V,W} = oftype(sum(one(eltype(r.wts))), length(r.y))
-nobs(r::LmResp{V,W}) where {V,W<:FrequencyWeights} = r.wts.sum
+nobs(r::LmResp{V,W}) where {V,W<:FrequencyWeights} = isempty(r.wts) ? oftype(sum(one(eltype(r.wts))), length(r.y)) : r.wts.sum
 
-nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = r.wts.sum
+nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = isempty(r.wts) ? oftype(sum(one(eltype(r.wts))), length(r.y)) : r.wts.sum
 nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W} = oftype(sum(one(eltype(r.wts))), length(r.y))
 

From 4363ba49364a080d3e388698cf50f6a7d8f995fb Mon Sep 17 00:00:00 2001
From: Giuseppe Ragusa <giuseppe.ragusa@gmail.com>
Date: Fri, 17 Jun 2022 15:48:42 +0200
Subject: [PATCH 4/8] Taking weights seriously

---
 src/linpred.jl | 104 +++++++++++++++++++++++++------------------------
 src/lm.jl      |  23 ++++++-----
 2 files changed, 67 insertions(+), 60 deletions(-)

diff --git a/src/linpred.jl b/src/linpred.jl
index 553fb503..972489e1 100644
--- a/src/linpred.jl
+++ b/src/linpred.jl
@@ -45,24 +45,33 @@ A `LinPred` type with a dense, unpivoted QR decomposition of `X`
 - `scratchbeta`: scratch vector of length `p`, used in `linpred!` method
 - `qr`: a `QRCompactWY` object created from `X`, with optional row weights.
 """
-mutable struct DensePredQR{T<:BlasReal} <: DensePred
+mutable struct DensePredQR{T<:BlasReal, W<:AbstractVector{<:Real}} <: DensePred
     X::Matrix{T}                  # model matrix
+    Xw::Matrix{T}                 # weighted model matrix
     beta0::Vector{T}              # base coefficient vector
     delbeta::Vector{T}            # coefficient increment
     scratchbeta::Vector{T}
     qr::QRCompactWY{T}
-    function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}) where T
+    wts::W
+    function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}, wts::W) where {T,W<:AbstractWeights{<:Real}}
         n, p = size(X)
         length(beta0) == p || throw(DimensionMismatch("length(β0) ≠ size(X,2)"))
-        new{T}(X, beta0, zeros(T,p), zeros(T,p), qr(X))
+        (length(wts) == n || isempty(wts)) || throw(DimensionMismatch("Lenght of weights does not match the dimension of X"))
+        Xw = isempty(_wt) ? Matrix{T}(undef, 0, 0) : sqrt.(wts).*X
+        qrX = isempty(_wts) ? qr(X) : qr(Xw)
+        new{T,W}(X, Xw, beta0, zeros(T,p), zeros(T,p), qrX, wts)
     end
-    function DensePredQR{T}(X::Matrix{T}) where T
+    function DensePredQR{T}(X::Matrix{T}, wts::W) where {T,W}
         n, p = size(X)
-        new{T}(X, zeros(T, p), zeros(T,p), zeros(T,p), qr(X))
+        DensePredQR(X, zeros(T, p), wts)
+    end
+    function DensePredQR(X::Matrix{T}) where T
+        n, p = size(X)
+        DensePredQR{T}(X, zeros(T, p), uweights(0))
     end
 end
-DensePredQR(X::Matrix, beta0::Vector) = DensePredQR{eltype(X)}(X, beta0)
-DensePredQR(X::Matrix{T}) where T = DensePredQR{T}(X, zeros(T, size(X,2)))
+DensePredQR(X::Matrix, beta0::Vector, wts::AbstractVector) = DensePredQR{eltype(X)}(X, beta0, wts)
+DensePredQR(X::Matrix{T}, wts::AbstractVector) where T = DensePredQR{T}(X, zeros(T, size(X,2)), wts)
 convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X, zeros(T, size(X, 2)))
 
 """
@@ -92,29 +101,34 @@ A `LinPred` type with a dense Cholesky factorization of `X'X`
 - `scratchm1`: scratch Matrix{T} of the same size as `X`
 - `scratchm2`: scratch Matrix{T} os the same size as `X'X`
 """
-mutable struct DensePredChol{T<:BlasReal,C} <: DensePred
+mutable struct DensePredChol{T<:BlasReal,W<:AbstractVector{<:Real},C} <: DensePred
     X::Matrix{T}                   # model matrix
+    Xw::Matrix{T}                  # weighted model matrix
     beta0::Vector{T}               # base vector for coefficients
     delbeta::Vector{T}             # coefficient increment
     scratchbeta::Vector{T}
-    chol::C
+    wts::W
+    chol::C    
     scratchm1::Matrix{T}
     scratchm2::Matrix{T}
 end
-function DensePredChol(X::AbstractMatrix, pivot::Bool)
-    F = Hermitian(float(X'X))
+function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Real}=uweights(0))
+    Xw = isempty(wts) ? Matrix{eltype(X)}(undef, 0, 0) : sqrt.(wts).*X
+    F = isempty(wts) ? Hermitian(float(X'X)) : Hermitian(float(Xw'Xw))
     T = eltype(F)
     F = pivot ? pivoted_cholesky!(F, tol = -one(T), check = false) : cholesky!(F)
     DensePredChol(Matrix{T}(X),
+    Matrix{T}(Xw),
     zeros(T, size(X, 2)),
     zeros(T, size(X, 2)),
     zeros(T, size(X, 2)),
+    wts,
     F,
     similar(X, T),
     similar(cholfactors(F)))
 end
 
-cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot)
+cholpred(X::AbstractMatrix, pivot::Bool, wts) = DensePredChol(X, pivot, wts)
 
 cholfactors(c::Union{Cholesky,CholeskyPivoted}) = c.factors
 cholesky!(p::DensePredChol{T}) where {T<:FP} = p.chol
@@ -131,9 +145,10 @@ function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}) where T<:BlasRea
     p
 end
 
-function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}) where T<:BlasReal
+function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Vector{T}) where T<:BlasReal
     ch = p.chol
-    delbeta = mul!(p.delbeta, adjoint(p.X), r)
+    Z = isempty(p.wts) ? p.X : p.Xw
+    delbeta = mul!(p.delbeta, adjoint(Z), r)
     rnk = rank(ch)
     if rnk == length(delbeta)
         ldiv!(ch, delbeta)
@@ -148,36 +163,43 @@ function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}) where T<:
     p
 end
 
-function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal
-    scr = mul!(p.scratchm1, Diagonal(wt), p.X)
-    cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(scr), p.X), :U))
+function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:Cholesky}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal
+    Z = isempty(p.wts) ? X : Xw
+    scr = mul!(p.scratchm1, Diagonal(wt), Z)
+    cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(scr), Z), :U))
     mul!(p.delbeta, transpose(scr), r)
     ldiv!(p.chol, p.delbeta)
     p
 end
 
-function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal
+function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal
+    Z = isempty(p.wts) ? p.X : p.Xw
     cf = cholfactors(p.chol)
     piv = p.chol.p
-    cf .= mul!(p.scratchm2, adjoint(LinearAlgebra.mul!(p.scratchm1, Diagonal(wt), p.X)), p.X)[piv, piv]
+    cf .= mul!(p.scratchm2, adjoint(LinearAlgebra.mul!(p.scratchm1, Diagonal(wt), Z)), Z)[piv, piv]
     cholesky!(Hermitian(cf, Symbol(p.chol.uplo)))
     ldiv!(p.chol, mul!(p.delbeta, transpose(p.scratchm1), r))
     p
 end
 
-mutable struct SparsePredChol{T,M<:SparseMatrixCSC,C} <: GLM.LinPred
+mutable struct SparsePredChol{T,W<:AbstractWeights{<:Real},M<:SparseMatrixCSC,C} <: GLM.LinPred
     X::M                           # model matrix
+    Xw::M                          # weighted model matrix
     Xt::M                          # X'
     beta0::Vector{T}               # base vector for coefficients
     delbeta::Vector{T}             # coefficient increment
     scratchbeta::Vector{T}
+    wts::W
     chol::C
     scratch::M
 end
-function SparsePredChol(X::SparseMatrixCSC{T}) where T
+function SparsePredChol(X::SparseMatrixCSC{T}, wts::AbstractVector) where T
     chol = cholesky(sparse(I, size(X, 2), size(X,2)))
+    sqrtwts = sqrt.(wts)
+    Xw = isempty(wts) ? SparseMatrixCSC(I, 0, 0) : sqrtwts.*X
     return SparsePredChol{eltype(X),typeof(X),typeof(chol)}(X,
-    X',
+    Xw,
+    isempty(wts) ? X' : Xw',
     zeros(T, size(X, 2)),
     zeros(T, size(X, 2)),
     zeros(T, size(X, 2)),
@@ -185,13 +207,14 @@ function SparsePredChol(X::SparseMatrixCSC{T}) where T
     similar(X))
 end
 
-cholpred(X::SparseMatrixCSC, pivot::Bool=false) = SparsePredChol(X)
+cholpred(X::SparseMatrixCSC, pivot::Bool=false, wts::AbstractVector=uweights(0)) = SparsePredChol(X, wts)
 
 function delbeta!(p::SparsePredChol{T}, r::Vector{T}, wt::Vector{T}) where T
-    scr = mul!(p.scratch, Diagonal(wt), p.X)
-    XtWX = p.Xt*scr
+    Z = isempty(p.wts) ? X : Xw
+    #scr = mul!(p.scratch, Diagonal(wt), Z)
+    XtWX = p.Xt*Z
     c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L'))
-    p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r)
+    p.delbeta = c \ mul!(p.delbeta, adjoint(Z), r)
 end
 
 function delbeta!(p::SparsePredChol{T}, r::Vector{T}) where T
@@ -237,34 +260,16 @@ end
 
 modelframe(obj::LinPredModel) = obj.fr
 
-function modelmatrix(obj::LinPredModel; weighted=false)  
-    wts = weights(obj)
-    X = obj.pp.X
-    if !weighted 
-        X
-    elseif !isempty(wts)
-        wts_times_X(X, wts)
+function modelmatrix(obj::LinPredModel; weighted=false) 
+    if !weighted
+        obj.pp.X
+    elseif !isempty(weights(obj))
+        obj.pp.Xw
     else
         throw(ArgumentError("`weighted=true` allowed only for weighted models."))
     end
 end
 
-function wts_times_X(X::AbstractSparseMatrix, wts::AbstractArray)
-    Z = copy(X)
-    rows = rowvals(Z)
-    vals = nonzeros(Z)
-    m, n = size(Z)
-    for j = 1:n
-        for i in nzrange(Z, j)
-            r = rows[i]         
-            vals[i] *= sqrt(wts[r])
-        end
-    end 
-    return Z
-end
-
-wts_times_X(X::AbstractMatrix, wts::AbstractArray) = sqrt.(wts).*X
-    
 response(obj::LinPredModel) = obj.rr.y
 
 fitted(m::LinPredModel) = m.rr.mu
@@ -279,4 +284,3 @@ coef(obj::LinPredModel) = coef(obj.pp)
 dof_residual(obj::LinPredModel) = nobs(obj) - dof(obj) + 1
 
 hasintercept(m::LinPredModel) = any(i -> all(==(1), view(m.pp.X , :, i)), 1:size(m.pp.X, 2))
-    
\ No newline at end of file
diff --git a/src/lm.jl b/src/lm.jl
index 994ac895..89ba41c8 100644
--- a/src/lm.jl
+++ b/src/lm.jl
@@ -30,15 +30,8 @@ end
 
 function LmResp(y::AbstractVector{<:Real}, wts::Union{Nothing,AbstractVector{<:Real}, AbstractWeights{<:Real}}=nothing)
     # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly
-    _y = convert(Vector{float(eltype(y))}, y)
-    _wts = if wts === nothing         
-        aweights(similar(_y, 0))
-    elseif isa(wts, Vector)
-        fweights(wts)
-    else
-        wts
-    end
-    return LmResp{typeof(_y), typeof(_wts)}(zero(_y), zero(_y), _wts, _y)
+    _y = convert(Vector{float(eltype(y))}, y)    
+    return LmResp{typeof(_y), typeof(wts)}(zero(_y), zero(_y), wts, _y)
 end
 
 function updateμ!(r::LmResp{V, W}, linPr::V) where {V<:FPVector, W}
@@ -187,7 +180,17 @@ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{<
     if isa(wts, Vector)
         Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using AnalyticalWeights, ProbabilityWeights, or FrequencyWeights.", :fit)
     end
-    fit!(LinearModel(LmResp(y, wts), cholpred(X, dropcollinear)))
+    _wts = if wts === nothing
+        uweights(0)
+    elseif isa(wts, AbstractWeights)
+        wts
+    elseif isa(wts, AbstractVector)
+        fweights(wts)
+    else
+        throw(ArgumentError("`wts` should be an AbstractVector coercible to an AbstractWeights"))
+    end
+
+    fit!(LinearModel(LmResp(y, _wts), cholpred(X, dropcollinear, _wts)))
 end
 
 """

From ca702dcda769bf137ebcf1b86a9c0501cc75e49d Mon Sep 17 00:00:00 2001
From: Giuseppe Ragusa <giuseppe.ragusa@gmail.com>
Date: Sat, 18 Jun 2022 13:07:07 +0200
Subject: [PATCH 5/8] WIP

---
 src/glmfit.jl  |  4 ++--
 src/linpred.jl | 52 ++++++++++++++++++++++++++++++++++++++------------
 src/lm.jl      | 12 +++---------
 3 files changed, 45 insertions(+), 23 deletions(-)

diff --git a/src/glmfit.jl b/src/glmfit.jl
index e9ad3f6c..760dcb1e 100644
--- a/src/glmfit.jl
+++ b/src/glmfit.jl
@@ -481,7 +481,7 @@ function fit(::Type{M},
     d::UnivariateDistribution,
     l::Link = canonicallink(d);
     dofit::Bool = true,
-    wts::Union{AbstractWeights{<:Real}, AbstractVector{<:Real}} = aweights(similar(y, 0)),
+    wts::AbstractWeights{<:Real},
     offset::AbstractVector{<:Real}   = similar(y, 0),
     fitargs...) where {M<:AbstractGLM}
 
@@ -491,7 +491,7 @@ function fit(::Type{M},
     end
 
     rr = GlmResp(y, d, l, offset, wts)
-    res = M(rr, cholpred(X), false)
+    res = M(rr, cholpred(X, false, wts), false)
     return dofit ? fit!(res; fitargs...) : res
 end
 
diff --git a/src/linpred.jl b/src/linpred.jl
index 972489e1..b74d4b78 100644
--- a/src/linpred.jl
+++ b/src/linpred.jl
@@ -112,7 +112,7 @@ mutable struct DensePredChol{T<:BlasReal,W<:AbstractVector{<:Real},C} <: DensePr
     scratchm1::Matrix{T}
     scratchm2::Matrix{T}
 end
-function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Real}=uweights(0))
+function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Real})
     Xw = isempty(wts) ? Matrix{eltype(X)}(undef, 0, 0) : sqrt.(wts).*X
     F = isempty(wts) ? Hermitian(float(X'X)) : Hermitian(float(Xw'Xw))
     T = eltype(F)
@@ -128,7 +128,8 @@ function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Re
     similar(cholfactors(F)))
 end
 
-cholpred(X::AbstractMatrix, pivot::Bool, wts) = DensePredChol(X, pivot, wts)
+cholpred(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights) = DensePredChol(X, pivot, wts)
+cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot, uweights(0))
 
 cholfactors(c::Union{Cholesky,CholeskyPivoted}) = c.factors
 cholesky!(p::DensePredChol{T}) where {T<:FP} = p.chol
@@ -163,22 +164,21 @@ function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Ve
     p
 end
 
-function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:Cholesky}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal
-    Z = isempty(p.wts) ? X : Xw
-    scr = mul!(p.scratchm1, Diagonal(wt), Z)
-    cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(scr), Z), :U))
-    mul!(p.delbeta, transpose(scr), r)
+function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:Cholesky}, r::Vector{T}) where T<:BlasReal
+    Z = isempty(p.wts) ? p.X : p.Xw
+    cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(Z), Z), :U))
+    mul!(p.delbeta, transpose(Z), r)
     ldiv!(p.chol, p.delbeta)
     p
 end
 
-function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal
+function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Vector{T}) where T<:BlasReal   
     Z = isempty(p.wts) ? p.X : p.Xw
     cf = cholfactors(p.chol)
     piv = p.chol.p
-    cf .= mul!(p.scratchm2, adjoint(LinearAlgebra.mul!(p.scratchm1, Diagonal(wt), Z)), Z)[piv, piv]
+    cf .= mul!(p.scratchm2, adjoint(Z), Z)[piv, piv]
     cholesky!(Hermitian(cf, Symbol(p.chol.uplo)))
-    ldiv!(p.chol, mul!(p.delbeta, transpose(p.scratchm1), r))
+    ldiv!(p.chol, mul!(p.delbeta, transpose(Z), r))
     p
 end
 
@@ -243,8 +243,36 @@ function invchol(x::DensePredChol{T,<: CholeskyPivoted}) where T
     res[ipiv, ipiv]
 end
 invchol(x::SparsePredChol) = cholesky!(x) \ Matrix{Float64}(I, size(x.X, 2), size(x.X, 2))
-## For ProbabilityWeights the variance is diferent
-vcov(x::LinPredModel) = rmul!(invchol(x.pp), dispersion(x, true))
+
+function vcov(x::LinPredModel) 
+    d = dispersion(x, true)
+    B = _covm(x.pp)
+    rmul!(B, dispersion(x, true))
+end
+
+_covm(pp::DensePredChol{T, W}) where {T,W} = invchol(pp)
+
+function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}) where {T} 
+    wts = pp.wts
+    Z = pp.scratchm1 .= pp.X.*wts
+    XtW2X = Z'Z
+    invXtWX = invchol(pp)
+    invXtWX*XtW2X*invXtWX
+end
+
+function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:CholeskyPivoted}) where {T} 
+    wts = pp.wts
+    Z = pp.scratchm1 .= pp.X.*wts
+    rnk = rank(pp.chol)
+    p = length(pp.delbeta)
+    if rnk == p
+        XtW2X = Z'Z
+    else
+        ## no idea
+    end
+    invXtWX = invchol(pp)
+    invXtWX*XtW2X*invXtWX
+end
 
 function cor(x::LinPredModel)
     Σ = vcov(x)
diff --git a/src/lm.jl b/src/lm.jl
index 89ba41c8..32c6a6d7 100644
--- a/src/lm.jl
+++ b/src/lm.jl
@@ -124,11 +124,7 @@ end
 LinearAlgebra.cholesky(x::LinearModel) = cholesky(x.pp)
 
 function StatsBase.fit!(obj::LinearModel)
-    if isempty(obj.rr.wts)
-        delbeta!(obj.pp, obj.rr.y)
-    else 
-        delbeta!(obj.pp, obj.rr.y, convert(Vector{eltype(obj.rr.y)}, obj.rr.wts))
-    end
+    delbeta!(obj.pp, obj.rr.y)    
     installbeta!(obj.pp)     
     updateμ!(obj.rr, linpred(obj.pp, zero(eltype(obj.rr.y))))
     return obj
@@ -177,17 +173,15 @@ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{<
               "argument `dropcollinear` instead. Proceeding with positional argument value: $allowrankdeficient_dep"
         dropcollinear = allowrankdeficient_dep
     end    
-    if isa(wts, Vector)
-        Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using AnalyticalWeights, ProbabilityWeights, or FrequencyWeights.", :fit)
-    end
     _wts = if wts === nothing
         uweights(0)
     elseif isa(wts, AbstractWeights)
         wts
     elseif isa(wts, AbstractVector)
+        Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using AnalyticalWeights, ProbabilityWeights, or FrequencyWeights", :fit)
         fweights(wts)
     else
-        throw(ArgumentError("`wts` should be an AbstractVector coercible to an AbstractWeights"))
+        throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights"))
     end
 
     fit!(LinearModel(LmResp(y, _wts), cholpred(X, dropcollinear, _wts)))

From e2b2d1223220bd9641a10e39422314a58de67527 Mon Sep 17 00:00:00 2001
From: Giuseppe Ragusa <giuseppe.ragusa@gmail.com>
Date: Tue, 21 Jun 2022 17:55:01 +0200
Subject: [PATCH 6/8] Taking weights seriously

---
 src/glmfit.jl    |  46 +++++++++---------
 src/linpred.jl   | 118 ++++++++++++++++++++++++++++++++---------------
 src/lm.jl        |  21 +++++----
 test/runtests.jl |  31 ++++++++-----
 4 files changed, 135 insertions(+), 81 deletions(-)

diff --git a/src/glmfit.jl b/src/glmfit.jl
index 760dcb1e..277c82ad 100644
--- a/src/glmfit.jl
+++ b/src/glmfit.jl
@@ -41,8 +41,8 @@ function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::W) where {V<:FPVec
     end
 
     # Lengths of wts and off can be either n or 0
-    if lw != 0 && lw != n
-        throw(DimensionMismatch("wts must have length $n or length 0 but was $lw"))
+    if lw != n
+        throw(DimensionMismatch("wts must have length $n but was $lw"))
     end
     if lo != 0 && lo != n
         throw(DimensionMismatch("offset must have length $n or length 0 but was $lo"))
@@ -51,29 +51,19 @@ function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::W) where {V<:FPVec
     return GlmResp{V,D,L,W}(y, d, l, similar(y), η, μ, off, wts, similar(y), similar(y))
 end
 
-function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::AbstractVector{<:Real})
+function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::AbstractWeights{<:Real})
     # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly
     _y   = convert(Vector{float(eltype(y))}, y)
     _off = convert(Vector{float(eltype(off))}, off)
-    _wts = if wts === nothing
-        ## This should be removed - here for allowing
-        ## passing a vector (deprecated) 
-        aweights(similar(_y, 0))
-    elseif isa(wts, AbstractWeights)   
-        wts
-    elseif isa(wts, AbstractVector)
-        ## for backward compatibility
-        fweights(wts)
-    end
     η    = similar(_y)
     μ    = similar(_y)
-    r    = GlmResp(_y, d, l, η, μ, _off, _wts)
-    initialeta!(r.eta, d, l, _y, _wts, _off)
+    r    = GlmResp(_y, d, l, η, μ, _off, wts)
+    initialeta!(r.eta, d, l, _y, wts, _off)
     updateμ!(r, r.eta)
     return r
 end
 
-function GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, wts::AbstractVector{<:Real}) where {D, L}
+function GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, wts::AbstractWeights{<:Real}) where {D, L}
     GlmResp(float(y), d, l, float(off), wts)
 end
 
@@ -296,7 +286,7 @@ function _fit!(m::AbstractGLM, verbose::Bool, maxiter::Integer, minstepfac::Real
     lp = r.mu
 
     # Initialize β, μ, and compute deviance
-    if start == nothing || isempty(start)
+    if start === nothing || isempty(start)
         # Compute beta update based on default response value
         # if no starting values have been passed
         delbeta!(p, wrkresp(r), r.wrkwt)
@@ -481,17 +471,25 @@ function fit(::Type{M},
     d::UnivariateDistribution,
     l::Link = canonicallink(d);
     dofit::Bool = true,
-    wts::AbstractWeights{<:Real},
-    offset::AbstractVector{<:Real}   = similar(y, 0),
+    wts::AbstractVector{<:Real} = uweights(length(y)),
+    offset::AbstractVector{<:Real} = similar(y, 0),
     fitargs...) where {M<:AbstractGLM}
-
+    println("got you")
     # Check that X and y have the same number of observations
     if size(X, 1) != size(y, 1)
         throw(DimensionMismatch("number of rows in X and y must match"))
     end
-
-    rr = GlmResp(y, d, l, offset, wts)
-    res = M(rr, cholpred(X, false, wts), false)
+    # For backward compatibility accept wts as AbstractArray and coerce them to FrequencyWeights
+    _wts = if isa(wts, AbstractWeights)
+        wts
+    elseif isa(wts, AbstractVector)
+        Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding by coercing wts to `FrequencyWeights`", :fit)
+        fweights(wts)
+    else
+        throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights"))
+    end
+    rr = GlmResp(y, d, l, offset, _wts)
+    res = M(rr, cholpred(X, false, _wts), false)
     return dofit ? fit!(res; fitargs...) : res
 end
 
@@ -500,7 +498,7 @@ fit(::Type{M},
     y::AbstractVector,
     d::UnivariateDistribution,
     l::Link=canonicallink(d); kwargs...) where {M<:AbstractGLM} =
-        fit(M, float(X), float(y), d, l; kwargs...)
+    fit(M, float(X), float(y), d, l; kwargs...)
 
 """
     glm(formula, data,
diff --git a/src/linpred.jl b/src/linpred.jl
index b74d4b78..3cd46761 100644
--- a/src/linpred.jl
+++ b/src/linpred.jl
@@ -45,7 +45,7 @@ A `LinPred` type with a dense, unpivoted QR decomposition of `X`
 - `scratchbeta`: scratch vector of length `p`, used in `linpred!` method
 - `qr`: a `QRCompactWY` object created from `X`, with optional row weights.
 """
-mutable struct DensePredQR{T<:BlasReal, W<:AbstractVector{<:Real}} <: DensePred
+mutable struct DensePredQR{T<:BlasReal, W<:AbstractWeights{<:Real}} <: DensePred
     X::Matrix{T}                  # model matrix
     Xw::Matrix{T}                 # weighted model matrix
     beta0::Vector{T}              # base coefficient vector
@@ -53,13 +53,14 @@ mutable struct DensePredQR{T<:BlasReal, W<:AbstractVector{<:Real}} <: DensePred
     scratchbeta::Vector{T}
     qr::QRCompactWY{T}
     wts::W
+    wresponse::Vector{T}
     function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}, wts::W) where {T,W<:AbstractWeights{<:Real}}
         n, p = size(X)
         length(beta0) == p || throw(DimensionMismatch("length(β0) ≠ size(X,2)"))
         (length(wts) == n || isempty(wts)) || throw(DimensionMismatch("Lenght of weights does not match the dimension of X"))
-        Xw = isempty(_wt) ? Matrix{T}(undef, 0, 0) : sqrt.(wts).*X
-        qrX = isempty(_wts) ? qr(X) : qr(Xw)
-        new{T,W}(X, Xw, beta0, zeros(T,p), zeros(T,p), qrX, wts)
+        Xw = wts isa UnitWeights ? Matrix{T}(undef, 0, 0) : sqrt.(wts).*X
+        qrX = wts isa UnitWeights ? qr(X) : qr(Xw)
+        new{T,W}(X, Xw, beta0, zeros(T,p), zeros(T,p), qrX, wts, similar(X, T, (size(X,1),) ))
     end
     function DensePredQR{T}(X::Matrix{T}, wts::W) where {T,W}
         n, p = size(X)
@@ -70,9 +71,10 @@ mutable struct DensePredQR{T<:BlasReal, W<:AbstractVector{<:Real}} <: DensePred
         DensePredQR{T}(X, zeros(T, p), uweights(0))
     end
 end
+DensePredQR{T}(X::Matrix) where T = DensePredQR{eltype(X)}(X, zeros(T, size(X, 2)), uweights(size(X,1)))
 DensePredQR(X::Matrix, beta0::Vector, wts::AbstractVector) = DensePredQR{eltype(X)}(X, beta0, wts)
 DensePredQR(X::Matrix{T}, wts::AbstractVector) where T = DensePredQR{T}(X, zeros(T, size(X,2)), wts)
-convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X, zeros(T, size(X, 2)))
+convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X)
 
 """
 delbeta!(p::LinPred, r::Vector)
@@ -99,18 +101,20 @@ A `LinPred` type with a dense Cholesky factorization of `X'X`
 - `scratchbeta`: scratch vector of length `p`, used in `linpred!` method
 - `chol`: a `Cholesky` object created from `X'X`, possibly using row weights.
 - `scratchm1`: scratch Matrix{T} of the same size as `X`
-- `scratchm2`: scratch Matrix{T} os the same size as `X'X`
+- `scratchm2`: scratch Matrix{T} of the same size as `X'X`
+- `scratchv1`: scratch Vector{T} of the same size of `y`
 """
-mutable struct DensePredChol{T<:BlasReal,W<:AbstractVector{<:Real},C} <: DensePred
+mutable struct DensePredChol{T<:BlasReal,C,W<:AbstractVector{<:Real}} <: DensePred
     X::Matrix{T}                   # model matrix
     Xw::Matrix{T}                  # weighted model matrix
     beta0::Vector{T}               # base vector for coefficients
     delbeta::Vector{T}             # coefficient increment
     scratchbeta::Vector{T}
+    chol::C
     wts::W
-    chol::C    
     scratchm1::Matrix{T}
     scratchm2::Matrix{T}
+    scratchv1::Vector{T}
 end
 function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Real})
     Xw = isempty(wts) ? Matrix{eltype(X)}(undef, 0, 0) : sqrt.(wts).*X
@@ -122,14 +126,16 @@ function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Re
     zeros(T, size(X, 2)),
     zeros(T, size(X, 2)),
     zeros(T, size(X, 2)),
-    wts,
     F,
+    wts,
     similar(X, T),
-    similar(cholfactors(F)))
+    similar(cholfactors(F)),
+    similar(X, T, (size(X,1),))
+    )
 end
 
 cholpred(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights) = DensePredChol(X, pivot, wts)
-cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot, uweights(0))
+cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot, uweights(size(X,1)))
 
 cholfactors(c::Union{Cholesky,CholeskyPivoted}) = c.factors
 cholesky!(p::DensePredChol{T}) where {T<:FP} = p.chol
@@ -141,15 +147,37 @@ function cholesky(p::DensePredChol{T}) where T<:FP
 end
 cholesky!(p::DensePredQR{T}) where {T<:FP} = Cholesky{T,typeof(p.X)}(p.qr.R, 'U', 0)
 
-function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}) where T<:BlasReal
+function delbeta!(p::DensePredChol{T,<:Cholesky, <:UnitWeights}, r::Vector{T}) where T<:BlasReal
     ldiv!(p.chol, mul!(p.delbeta, transpose(p.X), r))
+end
+
+function delbeta!(p::DensePredChol{T,<:Cholesky, <:AbstractWeights}, r::Vector{T}) where T<:BlasReal
+    p.scratchv1 .= r.*sqrt(p.wts)
+    ldiv!(p.chol, mul!(p.delbeta, transpose(p.Xw), p.scratchv1))
+end
+
+function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:UnitWeights}, r::Vector{T}) where T<:BlasReal
+    ch = p.chol
+    delbeta = mul!(p.delbeta, adjoint(p.X), r)
+    rnk = rank(ch)
+    if rnk == length(delbeta)
+        ldiv!(ch, delbeta)
+    else
+        permute!(delbeta, ch.p)
+        for k=(rnk+1):length(delbeta)
+            delbeta[k] = -zero(T)
+        end
+        LAPACK.potrs!(ch.uplo, view(ch.factors, 1:rnk, 1:rnk), view(delbeta, 1:rnk))
+        invpermute!(delbeta, ch.p)
+    end
     p
 end
 
-function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Vector{T}) where T<:BlasReal
+function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Vector{T}) where T<:BlasReal
     ch = p.chol
-    Z = isempty(p.wts) ? p.X : p.Xw
-    delbeta = mul!(p.delbeta, adjoint(Z), r)
+    Z = p.Xw
+    p.scratchv1 .= r.*sqrt.(p.wts)
+    delbeta = mul!(p.delbeta, adjoint(p.Xw), p.scratchv1)
     rnk = rank(ch)
     if rnk == length(delbeta)
         ldiv!(ch, delbeta)
@@ -164,66 +192,83 @@ function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Ve
     p
 end
 
-function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:Cholesky}, r::Vector{T}) where T<:BlasReal
-    Z = isempty(p.wts) ? p.X : p.Xw
-    cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(Z), Z), :U))
-    mul!(p.delbeta, transpose(Z), r)
+function delbeta!(p::DensePredChol{T,<:Cholesky,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal
+    p.scratchm1 .= wt.*p.X
+    cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(p.scratchm1), p.X), :U))
+    mul!(p.delbeta, transpose(p.scratchm1), r)
     ldiv!(p.chol, p.delbeta)
     p
 end
 
-function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Vector{T}) where T<:BlasReal   
-    Z = isempty(p.wts) ? p.X : p.Xw
+function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal
     cf = cholfactors(p.chol)
     piv = p.chol.p
-    cf .= mul!(p.scratchm2, adjoint(Z), Z)[piv, piv]
+    p.scratchm1 .= wt.*p.X
+    cf .= mul!(p.scratchm2, adjoint(p.scratchm1), p.X)[piv, piv]
     cholesky!(Hermitian(cf, Symbol(p.chol.uplo)))
-    ldiv!(p.chol, mul!(p.delbeta, transpose(Z), r))
+    ldiv!(p.chol, mul!(p.delbeta, transpose(p.scratchm1), r))
     p
 end
 
-mutable struct SparsePredChol{T,W<:AbstractWeights{<:Real},M<:SparseMatrixCSC,C} <: GLM.LinPred
+mutable struct SparsePredChol{T,M<:SparseMatrixCSC,C,W<:AbstractWeights{<:Real}} <: GLM.LinPred
     X::M                           # model matrix
     Xw::M                          # weighted model matrix
     Xt::M                          # X'
     beta0::Vector{T}               # base vector for coefficients
     delbeta::Vector{T}             # coefficient increment
     scratchbeta::Vector{T}
-    wts::W
     chol::C
     scratch::M
+    wts::W
 end
+
 function SparsePredChol(X::SparseMatrixCSC{T}, wts::AbstractVector) where T
     chol = cholesky(sparse(I, size(X, 2), size(X,2)))
     sqrtwts = sqrt.(wts)
     Xw = isempty(wts) ? SparseMatrixCSC(I, 0, 0) : sqrtwts.*X
-    return SparsePredChol{eltype(X),typeof(X),typeof(chol)}(X,
+    return SparsePredChol{eltype(X),typeof(X),typeof(chol), typeof(wts)}(X,
     Xw,
-    isempty(wts) ? X' : Xw',
+    X',
     zeros(T, size(X, 2)),
     zeros(T, size(X, 2)),
     zeros(T, size(X, 2)),
     chol,
-    similar(X))
+    similar(X),
+    wts)
 end
 
-cholpred(X::SparseMatrixCSC, pivot::Bool=false, wts::AbstractVector=uweights(0)) = SparsePredChol(X, wts)
+cholpred(X::SparseMatrixCSC, pivot::Bool=false) = SparsePredChol(X, uweights(size(X,1)))
+cholpred(X::SparseMatrixCSC, pivot::Bool, wts::AbstractWeights) = SparsePredChol(X, wts)
 
-function delbeta!(p::SparsePredChol{T}, r::Vector{T}, wt::Vector{T}) where T
-    Z = isempty(p.wts) ? X : Xw
-    #scr = mul!(p.scratch, Diagonal(wt), Z)
-    XtWX = p.Xt*Z
+function delbeta!(p::SparsePredChol{T,M,C,<:UnitWeights}, r::Vector{T}, wt::Vector{T}) where {T,M,C}
+    scr = mul!(p.scratch, Diagonal(wt), p.X)
+    XtWX = p.Xt*scr
     c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L'))
-    p.delbeta = c \ mul!(p.delbeta, adjoint(Z), r)
+    p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r)
 end
 
-function delbeta!(p::SparsePredChol{T}, r::Vector{T}) where T
+function delbeta!(p::SparsePredChol{T,M,C,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where {T,M,C}
+    scr = mul!(p.scratch, Diagonal(wt.*p.wts), p.X)
+    XtWX = p.Xt*scr
+    c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L'))
+    p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r)
+end
+
+function delbeta!(p::SparsePredChol{T,M,C,<:UnitWeights}, r::Vector{T}) where {T,M,C}
     scr = p.scratch = p.X
     XtWX = p.Xt*scr
     c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L'))
     p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r)
 end
 
+function delbeta!(p::SparsePredChol{T,M,C,<:AbstractWeights}, r::Vector{T}) where {T,M,C}
+    scr = p.scratch .= p.X.*p.wts
+    XtWX = p.Xt*scr
+    @show XtWX
+    c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L'))
+    p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r)
+end
+
 LinearAlgebra.cholesky(p::SparsePredChol{T}) where {T} = copy(p.chol)
 LinearAlgebra.cholesky!(p::SparsePredChol{T}) where {T} = p.chol
 
@@ -242,6 +287,7 @@ function invchol(x::DensePredChol{T,<: CholeskyPivoted}) where T
     ipiv = invperm(ch.p)
     res[ipiv, ipiv]
 end
+
 invchol(x::SparsePredChol) = cholesky!(x) \ Matrix{Float64}(I, size(x.X, 2), size(x.X, 2))
 
 function vcov(x::LinPredModel) 
@@ -250,7 +296,7 @@ function vcov(x::LinPredModel)
     rmul!(B, dispersion(x, true))
 end
 
-_covm(pp::DensePredChol{T, W}) where {T,W} = invchol(pp)
+_covm(pp::LinPred) = invchol(pp)
 
 function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}) where {T} 
     wts = pp.wts
diff --git a/src/lm.jl b/src/lm.jl
index 32c6a6d7..9d742958 100644
--- a/src/lm.jl
+++ b/src/lm.jl
@@ -12,7 +12,7 @@ Encapsulates the response for a linear model
 
 Either or both `offset` and `wts` may be of length 0
 """
-mutable struct LmResp{V<:FPVector, W<:Union{AbstractWeights{<:Real}, AbstractVector{<:Real}}} <: ModResp  # response in a linear model
+mutable struct LmResp{V<:FPVector, W<:AbstractWeights{<:Real}} <: ModResp  # response in a linear model
     mu::V                                  # mean response
     offset::V                              # offset added to linear predictor (may have length 0)
     wts::W                                 # prior weights (may have length 0)
@@ -28,9 +28,9 @@ mutable struct LmResp{V<:FPVector, W<:Union{AbstractWeights{<:Real}, AbstractVec
     end
 end
 
-function LmResp(y::AbstractVector{<:Real}, wts::Union{Nothing,AbstractVector{<:Real}, AbstractWeights{<:Real}}=nothing)
+function LmResp(y::AbstractVector{<:Real}, wts::AbstractWeights{<:Real})
     # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly
-    _y = convert(Vector{float(eltype(y))}, y)    
+    _y = convert(Vector{float(eltype(y))}, y)
     return LmResp{typeof(_y), typeof(wts)}(zero(_y), zero(_y), wts, _y)
 end
 
@@ -124,8 +124,8 @@ end
 LinearAlgebra.cholesky(x::LinearModel) = cholesky(x.pp)
 
 function StatsBase.fit!(obj::LinearModel)
-    delbeta!(obj.pp, obj.rr.y)    
-    installbeta!(obj.pp)     
+    delbeta!(obj.pp, obj.rr.y)
+    installbeta!(obj.pp)
     updateμ!(obj.rr, linpred(obj.pp, zero(eltype(obj.rr.y))))
     return obj
 end
@@ -166,19 +166,20 @@ $FIT_LM_DOC
 """
 function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{<:Real},
              allowrankdeficient_dep::Union{Bool,Nothing}=nothing;
-             wts::AbstractVector{<:Real}=similar(y, 0),
+             wts::AbstractVector{<:Real}=uweights(length(y)),
              dropcollinear::Bool=true)
     if allowrankdeficient_dep !== nothing
         @warn "Positional argument `allowrankdeficient` is deprecated, use keyword " *
               "argument `dropcollinear` instead. Proceeding with positional argument value: $allowrankdeficient_dep"
         dropcollinear = allowrankdeficient_dep
     end    
-    _wts = if wts === nothing
-        uweights(0)
-    elseif isa(wts, AbstractWeights)
+    # For backward compatibility accept wts as AbstractArray and coerce them to FrequencyWeights
+    _wts = if isa(wts, AbstractWeights)
         wts
     elseif isa(wts, AbstractVector)
-        Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using AnalyticalWeights, ProbabilityWeights, or FrequencyWeights", :fit)
+        Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " * 
+                     "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * 
+                     "by coercing wts to `FrequencyWeights`", :fit)
         fweights(wts)
     else
         throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights"))
diff --git a/test/runtests.jl b/test/runtests.jl
index 90bd33a0..45663c74 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -152,9 +152,8 @@ end
     @test all(isnan, hcat(coeftable(m2p).cols[2:end]...)[7,:])
 
     m2p_dep_pos = fit(LinearModel, Xmissingcell, ymissingcell, true)
-    @test_logs (:warn, "Positional argument `allowrankdeficient` is deprecated, use keyword " * "argument `dropcollinear` instead. Proceeding with positional argument value: true")  (:warn, "Passing weights as vector is deprecated in favor of explicitely using " * "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights.") fit(LinearModel, Xmissingcell, ymissingcell, true)
-    # @test_logs (:warn, "Positional argument `allowrankdeficient` is deprecated, use keyword " *
-    #             "argument `dropcollinear` instead. Proceeding with positional argument value: true") fit(LinearModel, Xmissingcell, ymissingcell, true)
+    @test_logs (:warn, "Positional argument `allowrankdeficient` is deprecated, use keyword " *
+                "argument `dropcollinear` instead. Proceeding with positional argument value: true") fit(LinearModel, Xmissingcell, ymissingcell, true)
     @test isa(m2p_dep_pos.pp.chol, CholeskyPivoted)
     @test rank(m2p_dep_pos.pp.chol) == rank(m2p.pp.chol)
     @test isapprox(deviance(m2p_dep_pos), deviance(m2p))
@@ -167,6 +166,16 @@ end
     @test isapprox(coef(m2p_dep_pos_kw), coef(m2p))
 end
 
+@testset "Passing wts (depwarn)" begin
+    df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3], wts = [3,3,3])
+    @test_logs (:warn, "Passing weights as vector is deprecated in favor of explicitely using " * 
+                       "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * 
+                       "by coercing wts to `FrequencyWeights`") lm(@formula(y~x), df; wts=wts)
+    @test_logs (:warn, "Passing weights as vector is deprecated in favor of explicitely using " * 
+                       "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * 
+                       "by coercing wts to `FrequencyWeights`") glm(@formula(y~x), Normal(), IdentityLink(), df; wts=wts)
+end
+
 @testset "saturated linear model" begin
     df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3])
     model = lm(@formula(y ~ x), df)
@@ -1206,14 +1215,14 @@ end
         glm4 = glm(view(x, :, :), view(y, :), Binomial())
         @test coef(glm1) == coef(glm2) == coef(glm3) == coef(glm4)
 
-        glm5 = glm(x, y, Binomial(), wts=w)
-        glm6 = glm(x, view(y, :), Binomial(), wts=w)
-        glm7 = glm(view(x, :, :), y, Binomial(), wts=w)
-        glm8 = glm(view(x, :, :), view(y, :), Binomial(), wts=w)
-        glm9 = glm(x, y, Binomial(), wts=view(w, :))
-        glm10 = glm(x, view(y, :), Binomial(), wts=view(w, :))
-        glm11 = glm(view(x, :, :), y, Binomial(), wts=view(w, :))
-        glm12 = glm(view(x, :, :), view(y, :), Binomial(), wts=view(w, :))
+        glm5 = glm(x, y, Binomial(), wts=fweights(w))
+        glm6 = glm(x, view(y, :), Binomial(), wts=fweights(w))
+        glm7 = glm(view(x, :, :), y, Binomial(), wts=fweights(w))
+        glm8 = glm(view(x, :, :), view(y, :), Binomial(), wts=fweights(w))
+        glm9 = glm(x, y, Binomial(), wts=fweights(view(w, :)))
+        glm10 = glm(x, view(y, :), Binomial(), wts=fweights(view(w, :)))
+        glm11 = glm(view(x, :, :), y, Binomial(), wts=fweights(view(w, :)))
+        glm12 = glm(view(x, :, :), view(y, :), Binomial(), wts=fweights(view(w, :)))
         @test coef(glm5) == coef(glm6) == coef(glm7) == coef(glm8) == coef(glm9) == coef(glm10) ==
             coef(glm11) == coef(glm12)
     end

From 84cd9901a6b71fce43446f504624c5727821226f Mon Sep 17 00:00:00 2001
From: Giuseppe Ragusa <giuseppe.ragusa@gmail.com>
Date: Wed, 22 Jun 2022 12:09:12 +0200
Subject: [PATCH 7/8] Add depwarn for passing wts with Vector

---
 src/glmfit.jl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/glmfit.jl b/src/glmfit.jl
index 277c82ad..909d3210 100644
--- a/src/glmfit.jl
+++ b/src/glmfit.jl
@@ -474,7 +474,6 @@ function fit(::Type{M},
     wts::AbstractVector{<:Real} = uweights(length(y)),
     offset::AbstractVector{<:Real} = similar(y, 0),
     fitargs...) where {M<:AbstractGLM}
-    println("got you")
     # Check that X and y have the same number of observations
     if size(X, 1) != size(y, 1)
         throw(DimensionMismatch("number of rows in X and y must match"))
@@ -483,7 +482,9 @@ function fit(::Type{M},
     _wts = if isa(wts, AbstractWeights)
         wts
     elseif isa(wts, AbstractVector)
-        Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding by coercing wts to `FrequencyWeights`", :fit)
+        Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " * 
+                     "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * 
+                     "by coercing wts to `FrequencyWeights`", :fit)
         fweights(wts)
     else
         throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights"))

From cbc329f9e0e865b18f3a0a35baad2df7aed2a48f Mon Sep 17 00:00:00 2001
From: Giuseppe Ragusa <giuseppe.ragusa@gmail.com>
Date: Wed, 22 Jun 2022 13:00:39 +0200
Subject: [PATCH 8/8] Cosmettic changes

---
 src/glmfit.jl  | 17 ++++++++---------
 src/linpred.jl | 51 +++++++++++++++++++++++++-------------------------
 src/lm.jl      | 21 ++++++++++-----------
 3 files changed, 43 insertions(+), 46 deletions(-)

diff --git a/src/glmfit.jl b/src/glmfit.jl
index 909d3210..94058ef8 100644
--- a/src/glmfit.jl
+++ b/src/glmfit.jl
@@ -17,7 +17,7 @@ struct GlmResp{V<:FPVector, D<:UnivariateDistribution,L<:Link,W<:AbstractWeights
     mu::V
     "`offset:` offset added to `Xβ` to form `eta`.  Can be of length 0"
     offset::V
-    "`wts:` prior case weights.  Can be of length 0."
+    "`wts`: prior case weights.  Can be of length 0."
     wts::W
     "`wrkwt`: working case weights for the Iteratively Reweighted Least Squares (IRLS) algorithm"
     wrkwt::V
@@ -372,7 +372,7 @@ function StatsBase.fit!(m::AbstractGLM;
     if haskey(kwargs, :tol)
         Base.depwarn("`tol` argument is deprecated, use `atol` and `rtol` instead", :fit!)
         rtol = kwargs[:tol]
-    end    
+    end
 
     _fit!(m, verbose, maxiter, minstepfac, atol, rtol, start)
 end
@@ -438,9 +438,9 @@ const FIT_GLM_DOC = """
 
     # Keyword Arguments
     - `dofit::Bool=true`: Determines whether model will be fit
-    - `wts::AbstractWeights=aweights(similar(y,0))`: Weights of observations. 
-      Allowed weights are `AnalyticalWeights`, `FrequencyWeights`, or `ProbabilityWeights`.        
-      If a vector is passed (deprecated) it is coerced to FrequencyWeights. 
+    - `wts::AbstractWeights=aweights(similar(y,0))`: Weights of observations.
+      Allowed weights are `AnalyticalWeights`, `FrequencyWeights`, or `ProbabilityWeights`.
+      If a vector is passed (deprecated) it is coerced to FrequencyWeights.
       Can be length 0 to indicate no weighting (default).
     - `offset::Vector=similar(y,0)`: offset added to `Xβ` to form `eta`.  Can be of
       length 0
@@ -482,8 +482,8 @@ function fit(::Type{M},
     _wts = if isa(wts, AbstractWeights)
         wts
     elseif isa(wts, AbstractVector)
-        Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " * 
-                     "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * 
+        Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " *
+                     "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " *
                      "by coercing wts to `FrequencyWeights`", :fit)
         fweights(wts)
     else
@@ -499,7 +499,7 @@ fit(::Type{M},
     y::AbstractVector,
     d::UnivariateDistribution,
     l::Link=canonicallink(d); kwargs...) where {M<:AbstractGLM} =
-    fit(M, float(X), float(y), d, l; kwargs...)
+        fit(M, float(X), float(y), d, l; kwargs...)
 
 """
     glm(formula, data,
@@ -669,4 +669,3 @@ nobs(r::LmResp{V,W}) where {V,W<:FrequencyWeights} = isempty(r.wts) ? oftype(sum
 
 nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = isempty(r.wts) ? oftype(sum(one(eltype(r.wts))), length(r.y)) : r.wts.sum
 nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W} = oftype(sum(one(eltype(r.wts))), length(r.y))
-
diff --git a/src/linpred.jl b/src/linpred.jl
index 3cd46761..441f1211 100644
--- a/src/linpred.jl
+++ b/src/linpred.jl
@@ -1,5 +1,5 @@
 """
-linpred!(out, p::LinPred, f::Real=1.0)
+    linpred!(out, p::LinPred, f::Real=1.0)
 
 Overwrite `out` with the linear predictor from `p` with factor `f`
 
@@ -11,14 +11,14 @@ function linpred!(out, p::LinPred, f::Real=1.)
 end
 
 """
-linpred(p::LinPred, f::Real=1.0)
+    linpred(p::LinPred, f::Real=1.0)
 
 Return the linear predictor `p.X * (p.beta0 .+ f * p.delbeta)`
 """
 linpred(p::LinPred, f::Real=1.) = linpred!(Vector{eltype(p.X)}(undef, size(p.X, 1)), p, f)
 
 """
-installbeta!(p::LinPred, f::Real=1.0)
+    installbeta!(p::LinPred, f::Real=1.0)
 
 Install `pbeta0 .+= f * p.delbeta` and zero out `p.delbeta`.  Return the updated `p.beta0`.
 """
@@ -33,7 +33,7 @@ function installbeta!(p::LinPred, f::Real=1.)
 end
 
 """
-DensePredQR
+    DensePredQR
 
 A `LinPred` type with a dense, unpivoted QR decomposition of `X`
 
@@ -122,16 +122,15 @@ function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Re
     T = eltype(F)
     F = pivot ? pivoted_cholesky!(F, tol = -one(T), check = false) : cholesky!(F)
     DensePredChol(Matrix{T}(X),
-    Matrix{T}(Xw),
-    zeros(T, size(X, 2)),
-    zeros(T, size(X, 2)),
-    zeros(T, size(X, 2)),
-    F,
-    wts,
-    similar(X, T),
-    similar(cholfactors(F)),
-    similar(X, T, (size(X,1),))
-    )
+        Matrix{T}(Xw),
+        zeros(T, size(X, 2)),
+        zeros(T, size(X, 2)),
+        zeros(T, size(X, 2)),
+        F,
+        wts,
+        similar(X, T),
+        similar(cholfactors(F)),
+        similar(X, T, (size(X,1),)))
 end
 
 cholpred(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights) = DensePredChol(X, pivot, wts)
@@ -227,14 +226,14 @@ function SparsePredChol(X::SparseMatrixCSC{T}, wts::AbstractVector) where T
     sqrtwts = sqrt.(wts)
     Xw = isempty(wts) ? SparseMatrixCSC(I, 0, 0) : sqrtwts.*X
     return SparsePredChol{eltype(X),typeof(X),typeof(chol), typeof(wts)}(X,
-    Xw,
-    X',
-    zeros(T, size(X, 2)),
-    zeros(T, size(X, 2)),
-    zeros(T, size(X, 2)),
-    chol,
-    similar(X),
-    wts)
+        Xw,
+        X',
+        zeros(T, size(X, 2)),
+        zeros(T, size(X, 2)),
+        zeros(T, size(X, 2)),
+        chol,
+        similar(X),
+        wts)
 end
 
 cholpred(X::SparseMatrixCSC, pivot::Bool=false) = SparsePredChol(X, uweights(size(X,1)))
@@ -290,7 +289,7 @@ end
 
 invchol(x::SparsePredChol) = cholesky!(x) \ Matrix{Float64}(I, size(x.X, 2), size(x.X, 2))
 
-function vcov(x::LinPredModel) 
+function vcov(x::LinPredModel)
     d = dispersion(x, true)
     B = _covm(x.pp)
     rmul!(B, dispersion(x, true))
@@ -298,7 +297,7 @@ end
 
 _covm(pp::LinPred) = invchol(pp)
 
-function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}) where {T} 
+function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}) where {T}
     wts = pp.wts
     Z = pp.scratchm1 .= pp.X.*wts
     XtW2X = Z'Z
@@ -306,7 +305,7 @@ function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}) where {T}
     invXtWX*XtW2X*invXtWX
 end
 
-function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:CholeskyPivoted}) where {T} 
+function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:CholeskyPivoted}) where {T}
     wts = pp.wts
     Z = pp.scratchm1 .= pp.X.*wts
     rnk = rank(pp.chol)
@@ -334,7 +333,7 @@ end
 
 modelframe(obj::LinPredModel) = obj.fr
 
-function modelmatrix(obj::LinPredModel; weighted=false) 
+function modelmatrix(obj::LinPredModel; weighted=false)
     if !weighted
         obj.pp.X
     elseif !isempty(weights(obj))
diff --git a/src/lm.jl b/src/lm.jl
index b49d6d8d..42a419c5 100644
--- a/src/lm.jl
+++ b/src/lm.jl
@@ -67,14 +67,14 @@ end
 
 function nullloglikelihood(r::LmResp)
     n = isempty(r.wts) ? length(r.y) : sum(r.wts)
-    -n/2 * (log(2π * nulldeviance(r)/n) + 1) 
+    -n/2 * (log(2π * nulldeviance(r)/n) + 1)
 end
 
-function residuals(r::LmResp; weighted=false) 
+function residuals(r::LmResp; weighted=false)
     wts = weights(r)
     res = r.y - r.mu
-    if !weighted 
-        res 
+    if !weighted
+        res
     elseif !isempty(wts)
         sqrt.(wts).*res
     else
@@ -118,14 +118,14 @@ const FIT_LM_DOC = """
     values of the dependent variable.
 
     The keyword argument `wts` can be an `AbstractWeights` specifying frequency weights for observations.
-    Weights allowed are: 
+    Weights allowed are:
     - `AnalyticaWeights`: describe a non-random relative importance (usually between 0 and 1)
     for each observation.
     - `FrequencyWeights`: describe the number of times (or frequency) each observation was observed.
     - `ProbabilityWeights`: represent the inverse of the sampling probability for each observation,
     providing a correction mechanism for under- or over-sampling certain population groups
-    These weights gives equal point estimates but different standard errors. 
-    If a vector is passed (deprecated), it is coerced to `FrequencyWeights`. 
+    These weights gives equal point estimates but different standard errors.
+    If a vector is passed (deprecated), it is coerced to `FrequencyWeights`.
 
     `dropcollinear` controls whether or not `lm` accepts a model matrix which
     is less-than-full rank. If `true` (the default), only the first of each set of
@@ -151,19 +151,18 @@ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{<
         @warn "Positional argument `allowrankdeficient` is deprecated, use keyword " *
               "argument `dropcollinear` instead. Proceeding with positional argument value: $allowrankdeficient_dep"
         dropcollinear = allowrankdeficient_dep
-    end    
+    end
     # For backward compatibility accept wts as AbstractArray and coerce them to FrequencyWeights
     _wts = if isa(wts, AbstractWeights)
         wts
     elseif isa(wts, AbstractVector)
-        Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " * 
-                     "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * 
+        Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " *
+                     "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " *
                      "by coercing wts to `FrequencyWeights`", :fit)
         fweights(wts)
     else
         throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights"))
     end
-
     fit!(LinearModel(LmResp(y, _wts), cholpred(X, dropcollinear, _wts)))
 end