From 088a2690c742ee392c431d2d33658d74c0e86f42 Mon Sep 17 00:00:00 2001 From: Daniel Winkler Date: Thu, 27 May 2021 13:39:06 +0200 Subject: [PATCH 1/7] Fixed linear model with perfectly collinear rhs variables and weights --- src/linpred.jl | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/linpred.jl b/src/linpred.jl index c498c7cf..164ae239 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -157,11 +157,26 @@ function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}, wt::Vector{T}) w end function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal - cf = cholfactors(p.chol) - piv = p.chol.p - cf .= mul!(p.scratchm2, adjoint(LinearAlgebra.mul!(p.scratchm1, Diagonal(wt), p.X)), p.X)[piv, piv] - cholesky!(Hermitian(cf, Symbol(p.chol.uplo))) - ldiv!(p.chol, mul!(p.delbeta, transpose(p.scratchm1), r)) + ch = p.chol + rnk = rank(ch) + if rnk == length(ch.p) + scr = mul!(p.scratchm1, Diagonal(wt), p.X) + cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(scr), p.X), :U)) + mul!(p.delbeta, transpose(scr), r) + ldiv!(p.chol, p.delbeta) + else + xc = p.X + scr = mul!(p.scratchm1, Diagonal(wt), xc) + scr2 = mul!(p.scratchm2, adjoint(xc), scr) + ch2 = cholesky!(scr2, Val(true), tol = -one(T), check = false) + delbeta = mul!(p.delbeta, adjoint(scr), r) + permute!(delbeta, ch.p) + for k=(rnk+1):length(delbeta) + delbeta[k] = -zero(T) + end + LAPACK.potrs!(ch2.uplo, view(ch2.factors, 1:rnk, 1:rnk), view(delbeta, 1:rnk)) + invpermute!(delbeta, ch.p) + end p end From 3acce1ef3ecf7a71fae6cc6423d86645a4a2fb12 Mon Sep 17 00:00:00 2001 From: Daniel Winkler Date: Fri, 4 Feb 2022 13:57:41 +0100 Subject: [PATCH 2/7] added test --- Project.toml | 2 ++ test/runtests.jl | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/Project.toml b/Project.toml index ea73e8f5..91e497e0 100644 --- a/Project.toml +++ b/Project.toml @@ -3,9 +3,11 @@ uuid = "38e38edf-8417-5370-95a0-9cbb8c7f171a" version = "1.6.0" [deps] +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" +RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" diff --git a/test/runtests.jl b/test/runtests.jl index 47c1682d..00faa2b2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -141,6 +141,24 @@ end @test isapprox(coef(m2p_dep_pos_kw), coef(m2p)) end +@testset "collinearity and weights" begin + rng = StableRNG(1234321) + x1 = randn(100) + x1_2 = 3 * x1 + x2 = 10 * randn(100) + x2_2 = -2.4 * x2 + y = 1 .+ randn() * x1 + randn() * x2 + 2 * randn(100) + df = DataFrame(y = y, x1 = x1, x2 = x1_2, x3 = x2, x4 = x2_2, weights = repeat([1, 0.5],50)) + f = @formula(y ~ x1 + x2 + x3 + x4) + lm_model = lm(f, df, wts = df.weights)#, dropcollinear = true) + X = [ones(length(y)) x1_2 x2_2] + W = Diagonal(df.weights) + coef_naive = (X'W*X)\X'W*y + @test lm_model.model.pp.chol isa CholeskyPivoted + @test rank(lm_model.model.pp.chol) == 3 + @test isapprox(filter(!=(0.0), coef(lm_model)), coef_naive) +end + dobson = DataFrame(Counts = [18.,17,15,20,10,20,25,13,12], Outcome = categorical(repeat(string.('A':'C'), outer = 3)), Treatment = categorical(repeat(string.('a':'c'), inner = 3))) From 519f3cd8813ad6c6595b3cbc74c71f288bd6bf78 Mon Sep 17 00:00:00 2001 From: Daniel Winkler Date: Fri, 4 Feb 2022 14:08:05 +0100 Subject: [PATCH 3/7] fixed full-rank case --- src/linpred.jl | 49 +++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/src/linpred.jl b/src/linpred.jl index 1683eb74..755afd8a 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -1,5 +1,5 @@ """ - linpred!(out, p::LinPred, f::Real=1.0) +linpred!(out, p::LinPred, f::Real=1.0) Overwrite `out` with the linear predictor from `p` with factor `f` @@ -11,14 +11,14 @@ function linpred!(out, p::LinPred, f::Real=1.) end """ - linpred(p::LinPred, f::Read=1.0) +linpred(p::LinPred, f::Read=1.0) Return the linear predictor `p.X * (p.beta0 .+ f * p.delbeta)` """ linpred(p::LinPred, f::Real=1.) = linpred!(Vector{eltype(p.X)}(undef, size(p.X, 1)), p, f) """ - installbeta!(p::LinPred, f::Real=1.0) +installbeta!(p::LinPred, f::Real=1.0) Install `pbeta0 .+= f * p.delbeta` and zero out `p.delbeta`. Return the updated `p.beta0`. """ @@ -33,7 +33,7 @@ function installbeta!(p::LinPred, f::Real=1.) end """ - DensePredQR +DensePredQR A `LinPred` type with a dense, unpivoted QR decomposition of `X` @@ -66,7 +66,7 @@ DensePredQR(X::Matrix{T}) where T = DensePredQR{T}(X, zeros(T, size(X,2))) convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X, zeros(T, size(X, 2))) """ - delbeta!(p::LinPred, r::Vector) +delbeta!(p::LinPred, r::Vector) Evaluate and return `p.delbeta` the increment to the coefficient vector from residual `r` """ @@ -78,7 +78,7 @@ function delbeta!(p::DensePredQR{T}, r::Vector{T}) where T<:BlasReal end """ - DensePredChol{T} +DensePredChol{T} A `LinPred` type with a dense Cholesky factorization of `X'X` @@ -106,12 +106,12 @@ function DensePredChol(X::StridedMatrix, pivot::Bool) T = eltype(F) F = pivot ? cholesky!(F, Val(true), tol = -one(T), check = false) : cholesky!(F) DensePredChol(AbstractMatrix{T}(X), - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - F, - similar(X, T), - similar(cholfactors(F))) + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + F, + similar(X, T), + similar(cholfactors(F))) end cholpred(X::StridedMatrix, pivot::Bool=false) = DensePredChol(X, pivot) @@ -160,10 +160,11 @@ function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}, wt::Vecto ch = p.chol rnk = rank(ch) if rnk == length(ch.p) - scr = mul!(p.scratchm1, Diagonal(wt), p.X) - cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(scr), p.X), :U)) - mul!(p.delbeta, transpose(scr), r) - ldiv!(p.chol, p.delbeta) + cf = cholfactors(ch) + piv = ch.p + cf .= mul!(p.scratchm2, adjoint(LinearAlgebra.mul!(p.scratchm1, Diagonal(wt), p.X)), p.X)[piv, piv] + cholesky!(Hermitian(cf, Symbol(ch.uplo))) + ldiv!(ch, mul!(p.delbeta, transpose(p.scratchm1), r)) else xc = p.X scr = mul!(p.scratchm1, Diagonal(wt), xc) @@ -192,12 +193,12 @@ end function SparsePredChol(X::SparseMatrixCSC{T}) where T chol = cholesky(sparse(I, size(X, 2), size(X,2))) return SparsePredChol{eltype(X),typeof(X),typeof(chol)}(X, - X', - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - chol, - similar(X)) + X', + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + chol, + similar(X)) end cholpred(X::SparseMatrixCSC) = SparsePredChol(X) @@ -252,8 +253,8 @@ StatsModels.formula(obj::LinPredModel) = modelframe(obj).formula residuals(obj::LinPredModel) = residuals(obj.rr) """ - nobs(obj::LinearModel) - nobs(obj::GLM) +nobs(obj::LinearModel) +nobs(obj::GLM) For linear and generalized linear models, returns the number of rows, or, when prior weights are specified, the sum of weights. From 20b59d855370d26a2ccbd78c83b0dffd5eaba0aa Mon Sep 17 00:00:00 2001 From: Daniel Winkler Date: Fri, 4 Feb 2022 14:13:57 +0100 Subject: [PATCH 4/7] Remove unnecessary deps --- Project.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/Project.toml b/Project.toml index c1821481..d7a12438 100644 --- a/Project.toml +++ b/Project.toml @@ -3,11 +3,9 @@ uuid = "38e38edf-8417-5370-95a0-9cbb8c7f171a" version = "1.6.1" [deps] -DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" -RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" From 5585306fdc2ae26ba2bbca3d3de43565d4ec60ce Mon Sep 17 00:00:00 2001 From: Daniel Winkler Date: Fri, 4 Feb 2022 14:22:27 +0100 Subject: [PATCH 5/7] Revert "fixed full-rank case" This reverts commit 519f3cd8813ad6c6595b3cbc74c71f288bd6bf78. --- src/linpred.jl | 49 ++++++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/src/linpred.jl b/src/linpred.jl index 218e452e..dcb104b4 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -1,5 +1,5 @@ """ -linpred!(out, p::LinPred, f::Real=1.0) + linpred!(out, p::LinPred, f::Real=1.0) Overwrite `out` with the linear predictor from `p` with factor `f` @@ -11,14 +11,14 @@ function linpred!(out, p::LinPred, f::Real=1.) end """ -linpred(p::LinPred, f::Read=1.0) + linpred(p::LinPred, f::Read=1.0) Return the linear predictor `p.X * (p.beta0 .+ f * p.delbeta)` """ linpred(p::LinPred, f::Real=1.) = linpred!(Vector{eltype(p.X)}(undef, size(p.X, 1)), p, f) """ -installbeta!(p::LinPred, f::Real=1.0) + installbeta!(p::LinPred, f::Real=1.0) Install `pbeta0 .+= f * p.delbeta` and zero out `p.delbeta`. Return the updated `p.beta0`. """ @@ -33,7 +33,7 @@ function installbeta!(p::LinPred, f::Real=1.) end """ -DensePredQR + DensePredQR A `LinPred` type with a dense, unpivoted QR decomposition of `X` @@ -66,7 +66,7 @@ DensePredQR(X::Matrix{T}) where T = DensePredQR{T}(X, zeros(T, size(X,2))) convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X, zeros(T, size(X, 2))) """ -delbeta!(p::LinPred, r::Vector) + delbeta!(p::LinPred, r::Vector) Evaluate and return `p.delbeta` the increment to the coefficient vector from residual `r` """ @@ -78,7 +78,7 @@ function delbeta!(p::DensePredQR{T}, r::Vector{T}) where T<:BlasReal end """ -DensePredChol{T} + DensePredChol{T} A `LinPred` type with a dense Cholesky factorization of `X'X` @@ -106,12 +106,12 @@ function DensePredChol(X::StridedMatrix, pivot::Bool) T = eltype(F) F = pivot ? pivoted_cholesky!(F, tol = -one(T), check = false) : cholesky!(F) DensePredChol(AbstractMatrix{T}(X), - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - F, - similar(X, T), - similar(cholfactors(F))) + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + F, + similar(X, T), + similar(cholfactors(F))) end cholpred(X::StridedMatrix, pivot::Bool=false) = DensePredChol(X, pivot) @@ -160,11 +160,10 @@ function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}, wt::Vecto ch = p.chol rnk = rank(ch) if rnk == length(ch.p) - cf = cholfactors(ch) - piv = ch.p - cf .= mul!(p.scratchm2, adjoint(LinearAlgebra.mul!(p.scratchm1, Diagonal(wt), p.X)), p.X)[piv, piv] - cholesky!(Hermitian(cf, Symbol(ch.uplo))) - ldiv!(ch, mul!(p.delbeta, transpose(p.scratchm1), r)) + scr = mul!(p.scratchm1, Diagonal(wt), p.X) + cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(scr), p.X), :U)) + mul!(p.delbeta, transpose(scr), r) + ldiv!(p.chol, p.delbeta) else xc = p.X scr = mul!(p.scratchm1, Diagonal(wt), xc) @@ -193,12 +192,12 @@ end function SparsePredChol(X::SparseMatrixCSC{T}) where T chol = cholesky(sparse(I, size(X, 2), size(X,2))) return SparsePredChol{eltype(X),typeof(X),typeof(chol)}(X, - X', - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - chol, - similar(X)) + X', + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + chol, + similar(X)) end cholpred(X::SparseMatrixCSC, pivot::Bool=false) = SparsePredChol(X) @@ -260,8 +259,8 @@ StatsModels.formula(obj::LinPredModel) = modelframe(obj).formula residuals(obj::LinPredModel) = residuals(obj.rr) """ -nobs(obj::LinearModel) -nobs(obj::GLM) + nobs(obj::LinearModel) + nobs(obj::GLM) For linear and generalized linear models, returns the number of rows, or, when prior weights are specified, the sum of weights. From eb0031f3cfb1f8845c8aeaf3e228968636fdeeb0 Mon Sep 17 00:00:00 2001 From: Daniel Winkler Date: Fri, 4 Feb 2022 14:23:24 +0100 Subject: [PATCH 6/7] fixed full-rank case --- src/linpred.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/linpred.jl b/src/linpred.jl index dcb104b4..0bf5c635 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -160,10 +160,11 @@ function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}, wt::Vecto ch = p.chol rnk = rank(ch) if rnk == length(ch.p) - scr = mul!(p.scratchm1, Diagonal(wt), p.X) - cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(scr), p.X), :U)) - mul!(p.delbeta, transpose(scr), r) - ldiv!(p.chol, p.delbeta) + cf = cholfactors(ch) + piv = ch.p + cf .= mul!(p.scratchm2, adjoint(LinearAlgebra.mul!(p.scratchm1, Diagonal(wt), p.X)), p.X)[piv, piv] + cholesky!(Hermitian(cf, Symbol(ch.uplo))) + ldiv!(ch, mul!(p.delbeta, transpose(p.scratchm1), r)) else xc = p.X scr = mul!(p.scratchm1, Diagonal(wt), xc) From b8eb78992acd3af60abab2a64a2c2259af850c29 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Sun, 28 Aug 2022 19:33:52 +0200 Subject: [PATCH 7/7] Remove comment --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index c01f2ca6..bdc97937 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -151,7 +151,7 @@ end y = 1 .+ randn() * x1 + randn() * x2 + 2 * randn(100) df = DataFrame(y = y, x1 = x1, x2 = x1_2, x3 = x2, x4 = x2_2, weights = repeat([1, 0.5],50)) f = @formula(y ~ x1 + x2 + x3 + x4) - lm_model = lm(f, df, wts = df.weights)#, dropcollinear = true) + lm_model = lm(f, df, wts = df.weights) X = [ones(length(y)) x1_2 x2_2] W = Diagonal(df.weights) coef_naive = (X'W*X)\X'W*y