diff --git a/src/linpred.jl b/src/linpred.jl index 4274f575..01d2b918 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -157,11 +157,27 @@ function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}, wt::Vector{T}) w end function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal - cf = cholfactors(p.chol) - piv = p.chol.p - cf .= mul!(p.scratchm2, adjoint(LinearAlgebra.mul!(p.scratchm1, Diagonal(wt), p.X)), p.X)[piv, piv] - cholesky!(Hermitian(cf, Symbol(p.chol.uplo))) - ldiv!(p.chol, mul!(p.delbeta, transpose(p.scratchm1), r)) + ch = p.chol + rnk = rank(ch) + if rnk == length(ch.p) + cf = cholfactors(ch) + piv = ch.p + cf .= mul!(p.scratchm2, adjoint(LinearAlgebra.mul!(p.scratchm1, Diagonal(wt), p.X)), p.X)[piv, piv] + cholesky!(Hermitian(cf, Symbol(ch.uplo))) + ldiv!(ch, mul!(p.delbeta, transpose(p.scratchm1), r)) + else + xc = p.X + scr = mul!(p.scratchm1, Diagonal(wt), xc) + scr2 = mul!(p.scratchm2, adjoint(xc), scr) + ch2 = cholesky!(scr2, Val(true), tol = -one(T), check = false) + delbeta = mul!(p.delbeta, adjoint(scr), r) + permute!(delbeta, ch.p) + for k=(rnk+1):length(delbeta) + delbeta[k] = -zero(T) + end + LAPACK.potrs!(ch2.uplo, view(ch2.factors, 1:rnk, 1:rnk), view(delbeta, 1:rnk)) + invpermute!(delbeta, ch.p) + end p end diff --git a/test/runtests.jl b/test/runtests.jl index b54d9127..bdc97937 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -142,6 +142,24 @@ end @test isapprox(coef(m2p_dep_pos_kw), coef(m2p)) end +@testset "collinearity and weights" begin + rng = StableRNG(1234321) + x1 = randn(100) + x1_2 = 3 * x1 + x2 = 10 * randn(100) + x2_2 = -2.4 * x2 + y = 1 .+ randn() * x1 + randn() * x2 + 2 * randn(100) + df = DataFrame(y = y, x1 = x1, x2 = x1_2, x3 = x2, x4 = x2_2, weights = repeat([1, 0.5],50)) + f = @formula(y ~ x1 + x2 + x3 + x4) + lm_model = lm(f, df, wts = df.weights) + X = [ones(length(y)) x1_2 x2_2] + W = Diagonal(df.weights) + coef_naive = (X'W*X)\X'W*y + @test lm_model.model.pp.chol isa CholeskyPivoted + @test rank(lm_model.model.pp.chol) == 3 + @test filter(!=(0.0), coef(lm_model)) ≈ coef_naive atol=1e-6 +end + @testset "saturated linear model" begin df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3]) model = lm(@formula(y ~ x), df)