remove ShapiroWilkCoefs

JuliaStats · Oct 23, 2023 · 284d780 · 284d780
1 parent cccb03c
commit 284d780
Show file tree

Hide file tree

Showing 2 changed files with 41 additions and 64 deletions.
diff --git a/src/shapiro_wilk.jl b/src/shapiro_wilk.jl
@@ -22,43 +22,21 @@ for (s, c) in [(:C1, [0.0, 0.221157, -0.147981, -2.07119, 4.434685, -2.706056]),
     @eval $(Symbol(:__RS92_, s))(x) = Base.Math.@horner(x, $(c...))
 end
 
-"""
-    HypothesisTests.ShapiroWilkCoefs(N::Integer)
-
-Construct a vector of de-correlated expected order statistics for Shapiro-Wilk
-test for a sample of size `N`.
-
-If multiple tests on samples of size `N` are performed, it is beneficial to
-construct and pass a single vector of coefficients to `ShapiroWilkTest`(@ref).
-"""
-struct ShapiroWilkCoefs <: AbstractVector{Float64}
-    N::Int
-    A::Vector{Float64}
-end
-
-Base.size(SWc::ShapiroWilkCoefs) = (SWc.N,)
-Base.IndexStyle(::Type{ShapiroWilkCoefs}) = IndexLinear()
-
-Base.@propagate_inbounds function Base.getindex(SWc::ShapiroWilkCoefs, i::Integer)
-    @boundscheck checkbounds(SWc, i)
-    @inbounds if checkbounds(Bool, SWc.A, i)
-        return SWc.A[i]
-    elseif isodd(length(SWc)) && i == lastindex(SWc.A) + 1
-        return zero(eltype(SWc))
-    else
-        return -SWc.A[SWc.N + 1 - i]
-    end
-end
-
-function ShapiroWilkCoefs(N::Integer)
+function shapiro_wilk_coefs(N::Integer)
     if N < 3
         throw(ArgumentError("N must be greater than or equal to 3, got $N"))
     elseif N == 3 # exact
-        return ShapiroWilkCoefs(N, [sqrt(2.0) / 2.0])
+        w = sqrt(2.0) / 2.0
+        return [w,zero(w),-w]
     else
         # Weisberg&Bingham 1975 statistic; store only positive half of m:
         # it is (anti-)symmetric; hence '2' factor below
-        m = [-quantile(Normal(), (i - 3 / 8) / (N + 1 / 4)) for i in 1:div(N, 2)]
+        n = div(N, 2)
+        m = Vector{Float64}(undef, N)
+        resize!(m, n)
+        for i in 1:n
+            m[i] = -quantile(Normal(), (i - 3 / 8) / (N + 1 / 4))
+        end
         mᵀm = 2sum(abs2, m)
         x = 1 / sqrt(N)
         a₁ = m[1] / sqrt(mᵀm) + __RS92_C1(x) # aₙ = cₙ + (...)
@@ -72,20 +50,27 @@ function ShapiroWilkCoefs(N::Integer)
             m .= m ./ sqrt(ϕ) # A, but reusing m to save allocs
             m[1], m[2] = a₁, a₂
         end
-        return ShapiroWilkCoefs(N, m)
+        resize!(m, N)
+        for i in 1:n
+            m[N-i+1] = -m[i]
+        end
+        if isodd(N)
+            m[n+1] = zero(eltype(m))
+        end
+        return m
     end
 end
 
-function unsafe_swstat(X::AbstractVector{<:Real}, A::ShapiroWilkCoefs)
-    AX = dot(view(A, 1:length(X)), X)
+function unsafe_swstat(X::AbstractVector{<:Real}, A::AbstractVector{<:Real})
+    AX = @inbounds dot(view(A, 1:length(X)), X)
     m = mean(X)
     S² = sum(x -> abs2(x - m), X)
     W = AX^2 / S²
     return min(W, one(W)) # to guard against numeric errors
 end
 
 struct ShapiroWilkTest <: HypothesisTest
-    coefs::ShapiroWilkCoefs # expectation of order statistics for Shapiro-Wilk test
+    coefs::Vector{Float64}  # expectation of order statistics for Shapiro-Wilk test
     W::Float64              # test statistic
     censored::Int           # only the smallest N₁ samples were used
 end
@@ -132,7 +117,7 @@ end
 
 """
     ShapiroWilkTest(X::AbstractVector{<:Real},
-                    swc::ShapiroWilkCoefs=ShapiroWilkCoefs(length(X));
+                    swc::AbstractVector{<:Real}=shapiro_wilk_coefs(length(X));
                     sorted::Bool=issorted(X),
                     censored::Integer=0)
 
@@ -161,7 +146,7 @@ Implements: [`pvalue`](@ref)
 # Implementation notes
 * The current implementation DOES NOT implement p-values for censored data.
 * If multiple Shapiro-Wilk tests are to be performed on samples of same
-  size, it is faster to construct `swc = ShapiroWilkCoefs(length(X))` once
+  size, it is faster to construct `swc = shapiro_wilk_coefs(length(X))` once
   and pass it to the test via `ShapiroWilkTest(X, swc)` for re-use.
 * For maximal performance sorted `X` should be passed and indicated with
   `sorted=true` keyword argument.
@@ -186,7 +171,7 @@ Normality. *Journal of the Royal Statistical Society Series C
 [doi:10.2307/2986146](https://doi.org/10.2307/2986146).
 """
 function ShapiroWilkTest(sample::AbstractVector{<:Real},
-                         swcoefs::ShapiroWilkCoefs=ShapiroWilkCoefs(length(sample));
+                         swcoefs::AbstractVector{<:Real}=shapiro_wilk_coefs(length(sample));
                          sorted::Bool=issorted(sample),
                          censored::Integer=0)
     N = length(sample)

diff --git a/test/shapiro_wilk.jl b/test/shapiro_wilk.jl
@@ -2,40 +2,32 @@ using HypothesisTests, LinearAlgebra, Test, Random
 using StableRNGs
 
 @testset "Shapiro-Wilk" begin
-    @testset "ShapiroWilkCoefs" begin
-        @test HypothesisTests.ShapiroWilkCoefs(3).A == [sqrt(2.0) / 2.0]
-        @test length(HypothesisTests.ShapiroWilkCoefs(3)) == 3
+    @testset "shapiro_wilk_coefs" begin
+        @test HypothesisTests.shapiro_wilk_coefs(3) == [sqrt(2.0) / 2.0, 0.0, -sqrt(2.0) / 2.0]
 
-        swc = HypothesisTests.ShapiroWilkCoefs(10)
-        @test length(swc) == 10
-        @test lastindex(swc) == 10
-        @test length(swc.A) == 5
+        swc = HypothesisTests.shapiro_wilk_coefs(10)
+        @test swc[4] == -swc[7]
+        @test swc[2] == -swc[9]
 
-        @test swc.A[4] == swc[4] == -swc[7]
-        @test swc.A[2] == swc[2] == -swc[9]
-
-        swc = HypothesisTests.ShapiroWilkCoefs(11)
-        @test length(swc) == 11
-        @test lastindex(swc) == 11
-        @test length(swc.A) == 5
+        swc = HypothesisTests.shapiro_wilk_coefs(11)
 
         @test swc[6] == 0.0
-        @test swc.A[5] == swc[5] == -swc[7]
-        @test swc.A[3] == swc[3] == -swc[9]
-        @test swc.A[1] == swc[1] == -swc[11]
+        @test swc[5] == -swc[7]
+        @test swc[3] == -swc[9]
+        @test swc[1] == -swc[11]
 
         #anti-symmetry
-        swc = HypothesisTests.ShapiroWilkCoefs(20)
+        swc = HypothesisTests.shapiro_wilk_coefs(20)
         @test all([swc[i] == -swc[end - i + 1] for i in eachindex(swc)])
 
         # Values obtained by calling `_swilkfort` fortran subroutine directly.
 
-        swc10 = HypothesisTests.ShapiroWilkCoefs(10)
-        res = swc10.A .- [0.573715, 0.32897, 0.214349, 0.122791, 0.0400887]
+        swc10 = HypothesisTests.shapiro_wilk_coefs(10)
+        res = swc10[1:5] .- [0.573715, 0.32897, 0.214349, 0.122791, 0.0400887]
         @test norm(res, 1) ≈ 0.0 atol = length(swc10) * eps(Float32)
 
-        swc20 = HypothesisTests.ShapiroWilkCoefs(20)
-        res = swc20.A .-
+        swc20 = HypothesisTests.shapiro_wilk_coefs(20)
+        res = swc20[1:10] .-
               [0.473371, 0.32174, 0.255663, 0.208297, 0.16864, 0.133584, 0.101474,
                0.0712893, 0.0423232, 0.0140351]
         @test norm(res, 1) ≈ 0.0 atol = length(swc20) * eps(Float32)
@@ -52,10 +44,10 @@ using StableRNGs
         # syntactic tests
 
         @test_throws ArgumentError ShapiroWilkTest([1, 2])
-        @test_throws ArgumentError("at least 3 samples are required, got 2") ShapiroWilkTest([1, 2], HypothesisTests.ShapiroWilkCoefs(3))
+        @test_throws ArgumentError("at least 3 samples are required, got 2") ShapiroWilkTest([1, 2], HypothesisTests.shapiro_wilk_coefs(3))
         @test_throws ArgumentError ShapiroWilkTest([1, 2, 3], censored=4)
         @test_throws DimensionMismatch ShapiroWilkTest([1, 2, 3],
-                                                       HypothesisTests.ShapiroWilkCoefs(4))
+                                                       HypothesisTests.shapiro_wilk_coefs(4))
 
         @test_throws ArgumentError("sample doesn't seem to be sorted or is constant (up to numerical accuracy)") ShapiroWilkTest([1,1,1])
         @test_throws ArgumentError("sample is constant (up to numerical accuracy)") ShapiroWilkTest([1,1,1], sorted=false)
@@ -110,8 +102,8 @@ using StableRNGs
             # *Statistics and Computing* (1992) **2**, 117-119
 
             X = [48.4, 49.0, 59.5, 59.6, 60.7, 88.8, 98.2, 109.4, 169.1, 227.1]
-            swc = HypothesisTests.ShapiroWilkCoefs(length(X))
-            @test norm(swc.A .- [0.5737, 0.3290, 0.2143, 0.1228, 0.0401], Inf) < 5.0e-5
+            swc = HypothesisTests.shapiro_wilk_coefs(length(X))
+            @test norm(swc[1:5] .- [0.5737, 0.3290, 0.2143, 0.1228, 0.0401], Inf) < 5.0e-5
             W = HypothesisTests.unsafe_swstat(X, swc)
             @test W ≈ 0.8078 atol = 2.9e-5