From 284d7806b1ada00f60bfc739bdef3ab01194ea2e Mon Sep 17 00:00:00 2001 From: Marek Kaluba Date: Mon, 23 Oct 2023 09:54:48 +0200 Subject: [PATCH] remove ShapiroWilkCoefs --- src/shapiro_wilk.jl | 61 +++++++++++++++++--------------------------- test/shapiro_wilk.jl | 44 +++++++++++++------------------- 2 files changed, 41 insertions(+), 64 deletions(-) diff --git a/src/shapiro_wilk.jl b/src/shapiro_wilk.jl index f15b07c2..a8bddaba 100644 --- a/src/shapiro_wilk.jl +++ b/src/shapiro_wilk.jl @@ -22,43 +22,21 @@ for (s, c) in [(:C1, [0.0, 0.221157, -0.147981, -2.07119, 4.434685, -2.706056]), @eval $(Symbol(:__RS92_, s))(x) = Base.Math.@horner(x, $(c...)) end -""" - HypothesisTests.ShapiroWilkCoefs(N::Integer) - -Construct a vector of de-correlated expected order statistics for Shapiro-Wilk -test for a sample of size `N`. - -If multiple tests on samples of size `N` are performed, it is beneficial to -construct and pass a single vector of coefficients to `ShapiroWilkTest`(@ref). -""" -struct ShapiroWilkCoefs <: AbstractVector{Float64} - N::Int - A::Vector{Float64} -end - -Base.size(SWc::ShapiroWilkCoefs) = (SWc.N,) -Base.IndexStyle(::Type{ShapiroWilkCoefs}) = IndexLinear() - -Base.@propagate_inbounds function Base.getindex(SWc::ShapiroWilkCoefs, i::Integer) - @boundscheck checkbounds(SWc, i) - @inbounds if checkbounds(Bool, SWc.A, i) - return SWc.A[i] - elseif isodd(length(SWc)) && i == lastindex(SWc.A) + 1 - return zero(eltype(SWc)) - else - return -SWc.A[SWc.N + 1 - i] - end -end - -function ShapiroWilkCoefs(N::Integer) +function shapiro_wilk_coefs(N::Integer) if N < 3 throw(ArgumentError("N must be greater than or equal to 3, got $N")) elseif N == 3 # exact - return ShapiroWilkCoefs(N, [sqrt(2.0) / 2.0]) + w = sqrt(2.0) / 2.0 + return [w,zero(w),-w] else # Weisberg&Bingham 1975 statistic; store only positive half of m: # it is (anti-)symmetric; hence '2' factor below - m = [-quantile(Normal(), (i - 3 / 8) / (N + 1 / 4)) for i in 1:div(N, 2)] + n = div(N, 2) + m = Vector{Float64}(undef, N) + resize!(m, n) + for i in 1:n + m[i] = -quantile(Normal(), (i - 3 / 8) / (N + 1 / 4)) + end mᵀm = 2sum(abs2, m) x = 1 / sqrt(N) a₁ = m[1] / sqrt(mᵀm) + __RS92_C1(x) # aₙ = cₙ + (...) @@ -72,12 +50,19 @@ function ShapiroWilkCoefs(N::Integer) m .= m ./ sqrt(ϕ) # A, but reusing m to save allocs m[1], m[2] = a₁, a₂ end - return ShapiroWilkCoefs(N, m) + resize!(m, N) + for i in 1:n + m[N-i+1] = -m[i] + end + if isodd(N) + m[n+1] = zero(eltype(m)) + end + return m end end -function unsafe_swstat(X::AbstractVector{<:Real}, A::ShapiroWilkCoefs) - AX = dot(view(A, 1:length(X)), X) +function unsafe_swstat(X::AbstractVector{<:Real}, A::AbstractVector{<:Real}) + AX = @inbounds dot(view(A, 1:length(X)), X) m = mean(X) S² = sum(x -> abs2(x - m), X) W = AX^2 / S² @@ -85,7 +70,7 @@ function unsafe_swstat(X::AbstractVector{<:Real}, A::ShapiroWilkCoefs) end struct ShapiroWilkTest <: HypothesisTest - coefs::ShapiroWilkCoefs # expectation of order statistics for Shapiro-Wilk test + coefs::Vector{Float64} # expectation of order statistics for Shapiro-Wilk test W::Float64 # test statistic censored::Int # only the smallest N₁ samples were used end @@ -132,7 +117,7 @@ end """ ShapiroWilkTest(X::AbstractVector{<:Real}, - swc::ShapiroWilkCoefs=ShapiroWilkCoefs(length(X)); + swc::AbstractVector{<:Real}=shapiro_wilk_coefs(length(X)); sorted::Bool=issorted(X), censored::Integer=0) @@ -161,7 +146,7 @@ Implements: [`pvalue`](@ref) # Implementation notes * The current implementation DOES NOT implement p-values for censored data. * If multiple Shapiro-Wilk tests are to be performed on samples of same - size, it is faster to construct `swc = ShapiroWilkCoefs(length(X))` once + size, it is faster to construct `swc = shapiro_wilk_coefs(length(X))` once and pass it to the test via `ShapiroWilkTest(X, swc)` for re-use. * For maximal performance sorted `X` should be passed and indicated with `sorted=true` keyword argument. @@ -186,7 +171,7 @@ Normality. *Journal of the Royal Statistical Society Series C [doi:10.2307/2986146](https://doi.org/10.2307/2986146). """ function ShapiroWilkTest(sample::AbstractVector{<:Real}, - swcoefs::ShapiroWilkCoefs=ShapiroWilkCoefs(length(sample)); + swcoefs::AbstractVector{<:Real}=shapiro_wilk_coefs(length(sample)); sorted::Bool=issorted(sample), censored::Integer=0) N = length(sample) diff --git a/test/shapiro_wilk.jl b/test/shapiro_wilk.jl index a3ba4526..702dfca4 100644 --- a/test/shapiro_wilk.jl +++ b/test/shapiro_wilk.jl @@ -2,40 +2,32 @@ using HypothesisTests, LinearAlgebra, Test, Random using StableRNGs @testset "Shapiro-Wilk" begin - @testset "ShapiroWilkCoefs" begin - @test HypothesisTests.ShapiroWilkCoefs(3).A == [sqrt(2.0) / 2.0] - @test length(HypothesisTests.ShapiroWilkCoefs(3)) == 3 + @testset "shapiro_wilk_coefs" begin + @test HypothesisTests.shapiro_wilk_coefs(3) == [sqrt(2.0) / 2.0, 0.0, -sqrt(2.0) / 2.0] - swc = HypothesisTests.ShapiroWilkCoefs(10) - @test length(swc) == 10 - @test lastindex(swc) == 10 - @test length(swc.A) == 5 + swc = HypothesisTests.shapiro_wilk_coefs(10) + @test swc[4] == -swc[7] + @test swc[2] == -swc[9] - @test swc.A[4] == swc[4] == -swc[7] - @test swc.A[2] == swc[2] == -swc[9] - - swc = HypothesisTests.ShapiroWilkCoefs(11) - @test length(swc) == 11 - @test lastindex(swc) == 11 - @test length(swc.A) == 5 + swc = HypothesisTests.shapiro_wilk_coefs(11) @test swc[6] == 0.0 - @test swc.A[5] == swc[5] == -swc[7] - @test swc.A[3] == swc[3] == -swc[9] - @test swc.A[1] == swc[1] == -swc[11] + @test swc[5] == -swc[7] + @test swc[3] == -swc[9] + @test swc[1] == -swc[11] #anti-symmetry - swc = HypothesisTests.ShapiroWilkCoefs(20) + swc = HypothesisTests.shapiro_wilk_coefs(20) @test all([swc[i] == -swc[end - i + 1] for i in eachindex(swc)]) # Values obtained by calling `_swilkfort` fortran subroutine directly. - swc10 = HypothesisTests.ShapiroWilkCoefs(10) - res = swc10.A .- [0.573715, 0.32897, 0.214349, 0.122791, 0.0400887] + swc10 = HypothesisTests.shapiro_wilk_coefs(10) + res = swc10[1:5] .- [0.573715, 0.32897, 0.214349, 0.122791, 0.0400887] @test norm(res, 1) ≈ 0.0 atol = length(swc10) * eps(Float32) - swc20 = HypothesisTests.ShapiroWilkCoefs(20) - res = swc20.A .- + swc20 = HypothesisTests.shapiro_wilk_coefs(20) + res = swc20[1:10] .- [0.473371, 0.32174, 0.255663, 0.208297, 0.16864, 0.133584, 0.101474, 0.0712893, 0.0423232, 0.0140351] @test norm(res, 1) ≈ 0.0 atol = length(swc20) * eps(Float32) @@ -52,10 +44,10 @@ using StableRNGs # syntactic tests @test_throws ArgumentError ShapiroWilkTest([1, 2]) - @test_throws ArgumentError("at least 3 samples are required, got 2") ShapiroWilkTest([1, 2], HypothesisTests.ShapiroWilkCoefs(3)) + @test_throws ArgumentError("at least 3 samples are required, got 2") ShapiroWilkTest([1, 2], HypothesisTests.shapiro_wilk_coefs(3)) @test_throws ArgumentError ShapiroWilkTest([1, 2, 3], censored=4) @test_throws DimensionMismatch ShapiroWilkTest([1, 2, 3], - HypothesisTests.ShapiroWilkCoefs(4)) + HypothesisTests.shapiro_wilk_coefs(4)) @test_throws ArgumentError("sample doesn't seem to be sorted or is constant (up to numerical accuracy)") ShapiroWilkTest([1,1,1]) @test_throws ArgumentError("sample is constant (up to numerical accuracy)") ShapiroWilkTest([1,1,1], sorted=false) @@ -110,8 +102,8 @@ using StableRNGs # *Statistics and Computing* (1992) **2**, 117-119 X = [48.4, 49.0, 59.5, 59.6, 60.7, 88.8, 98.2, 109.4, 169.1, 227.1] - swc = HypothesisTests.ShapiroWilkCoefs(length(X)) - @test norm(swc.A .- [0.5737, 0.3290, 0.2143, 0.1228, 0.0401], Inf) < 5.0e-5 + swc = HypothesisTests.shapiro_wilk_coefs(length(X)) + @test norm(swc[1:5] .- [0.5737, 0.3290, 0.2143, 0.1228, 0.0401], Inf) < 5.0e-5 W = HypothesisTests.unsafe_swstat(X, swc) @test W ≈ 0.8078 atol = 2.9e-5