Skip to content

Commit

Permalink
remove ShapiroWilkCoefs
Browse files Browse the repository at this point in the history
  • Loading branch information
kalmarek committed Oct 23, 2023
1 parent cccb03c commit 284d780
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 64 deletions.
61 changes: 23 additions & 38 deletions src/shapiro_wilk.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,43 +22,21 @@ for (s, c) in [(:C1, [0.0, 0.221157, -0.147981, -2.07119, 4.434685, -2.706056]),
@eval $(Symbol(:__RS92_, s))(x) = Base.Math.@horner(x, $(c...))
end

"""
HypothesisTests.ShapiroWilkCoefs(N::Integer)
Construct a vector of de-correlated expected order statistics for Shapiro-Wilk
test for a sample of size `N`.
If multiple tests on samples of size `N` are performed, it is beneficial to
construct and pass a single vector of coefficients to `ShapiroWilkTest`(@ref).
"""
struct ShapiroWilkCoefs <: AbstractVector{Float64}
N::Int
A::Vector{Float64}
end

Base.size(SWc::ShapiroWilkCoefs) = (SWc.N,)
Base.IndexStyle(::Type{ShapiroWilkCoefs}) = IndexLinear()

Base.@propagate_inbounds function Base.getindex(SWc::ShapiroWilkCoefs, i::Integer)
@boundscheck checkbounds(SWc, i)
@inbounds if checkbounds(Bool, SWc.A, i)
return SWc.A[i]
elseif isodd(length(SWc)) && i == lastindex(SWc.A) + 1
return zero(eltype(SWc))
else
return -SWc.A[SWc.N + 1 - i]
end
end

function ShapiroWilkCoefs(N::Integer)
function shapiro_wilk_coefs(N::Integer)
if N < 3
throw(ArgumentError("N must be greater than or equal to 3, got $N"))
elseif N == 3 # exact
return ShapiroWilkCoefs(N, [sqrt(2.0) / 2.0])
w = sqrt(2.0) / 2.0
return [w,zero(w),-w]
else
# Weisberg&Bingham 1975 statistic; store only positive half of m:
# it is (anti-)symmetric; hence '2' factor below
m = [-quantile(Normal(), (i - 3 / 8) / (N + 1 / 4)) for i in 1:div(N, 2)]
n = div(N, 2)
m = Vector{Float64}(undef, N)
resize!(m, n)
for i in 1:n
m[i] = -quantile(Normal(), (i - 3 / 8) / (N + 1 / 4))
end
mᵀm = 2sum(abs2, m)
x = 1 / sqrt(N)
a₁ = m[1] / sqrt(mᵀm) + __RS92_C1(x) # aₙ = cₙ + (...)
Expand All @@ -72,20 +50,27 @@ function ShapiroWilkCoefs(N::Integer)
m .= m ./ sqrt(ϕ) # A, but reusing m to save allocs
m[1], m[2] = a₁, a₂
end
return ShapiroWilkCoefs(N, m)
resize!(m, N)
for i in 1:n
m[N-i+1] = -m[i]
end
if isodd(N)
m[n+1] = zero(eltype(m))
end
return m
end
end

function unsafe_swstat(X::AbstractVector{<:Real}, A::ShapiroWilkCoefs)
AX = dot(view(A, 1:length(X)), X)
function unsafe_swstat(X::AbstractVector{<:Real}, A::AbstractVector{<:Real})
AX = @inbounds dot(view(A, 1:length(X)), X)
m = mean(X)
= sum(x -> abs2(x - m), X)
W = AX^2 /
return min(W, one(W)) # to guard against numeric errors
end

struct ShapiroWilkTest <: HypothesisTest
coefs::ShapiroWilkCoefs # expectation of order statistics for Shapiro-Wilk test
coefs::Vector{Float64} # expectation of order statistics for Shapiro-Wilk test
W::Float64 # test statistic
censored::Int # only the smallest N₁ samples were used
end
Expand Down Expand Up @@ -132,7 +117,7 @@ end

"""
ShapiroWilkTest(X::AbstractVector{<:Real},
swc::ShapiroWilkCoefs=ShapiroWilkCoefs(length(X));
swc::AbstractVector{<:Real}=shapiro_wilk_coefs(length(X));
sorted::Bool=issorted(X),
censored::Integer=0)
Expand Down Expand Up @@ -161,7 +146,7 @@ Implements: [`pvalue`](@ref)
# Implementation notes
* The current implementation DOES NOT implement p-values for censored data.
* If multiple Shapiro-Wilk tests are to be performed on samples of same
size, it is faster to construct `swc = ShapiroWilkCoefs(length(X))` once
size, it is faster to construct `swc = shapiro_wilk_coefs(length(X))` once
and pass it to the test via `ShapiroWilkTest(X, swc)` for re-use.
* For maximal performance sorted `X` should be passed and indicated with
`sorted=true` keyword argument.
Expand All @@ -186,7 +171,7 @@ Normality. *Journal of the Royal Statistical Society Series C
[doi:10.2307/2986146](https://doi.org/10.2307/2986146).
"""
function ShapiroWilkTest(sample::AbstractVector{<:Real},
swcoefs::ShapiroWilkCoefs=ShapiroWilkCoefs(length(sample));
swcoefs::AbstractVector{<:Real}=shapiro_wilk_coefs(length(sample));
sorted::Bool=issorted(sample),
censored::Integer=0)
N = length(sample)
Expand Down
44 changes: 18 additions & 26 deletions test/shapiro_wilk.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,40 +2,32 @@ using HypothesisTests, LinearAlgebra, Test, Random
using StableRNGs

@testset "Shapiro-Wilk" begin
@testset "ShapiroWilkCoefs" begin
@test HypothesisTests.ShapiroWilkCoefs(3).A == [sqrt(2.0) / 2.0]
@test length(HypothesisTests.ShapiroWilkCoefs(3)) == 3
@testset "shapiro_wilk_coefs" begin
@test HypothesisTests.shapiro_wilk_coefs(3) == [sqrt(2.0) / 2.0, 0.0, -sqrt(2.0) / 2.0]

swc = HypothesisTests.ShapiroWilkCoefs(10)
@test length(swc) == 10
@test lastindex(swc) == 10
@test length(swc.A) == 5
swc = HypothesisTests.shapiro_wilk_coefs(10)
@test swc[4] == -swc[7]
@test swc[2] == -swc[9]

@test swc.A[4] == swc[4] == -swc[7]
@test swc.A[2] == swc[2] == -swc[9]

swc = HypothesisTests.ShapiroWilkCoefs(11)
@test length(swc) == 11
@test lastindex(swc) == 11
@test length(swc.A) == 5
swc = HypothesisTests.shapiro_wilk_coefs(11)

@test swc[6] == 0.0
@test swc.A[5] == swc[5] == -swc[7]
@test swc.A[3] == swc[3] == -swc[9]
@test swc.A[1] == swc[1] == -swc[11]
@test swc[5] == -swc[7]
@test swc[3] == -swc[9]
@test swc[1] == -swc[11]

#anti-symmetry
swc = HypothesisTests.ShapiroWilkCoefs(20)
swc = HypothesisTests.shapiro_wilk_coefs(20)
@test all([swc[i] == -swc[end - i + 1] for i in eachindex(swc)])

# Values obtained by calling `_swilkfort` fortran subroutine directly.

swc10 = HypothesisTests.ShapiroWilkCoefs(10)
res = swc10.A .- [0.573715, 0.32897, 0.214349, 0.122791, 0.0400887]
swc10 = HypothesisTests.shapiro_wilk_coefs(10)
res = swc10[1:5] .- [0.573715, 0.32897, 0.214349, 0.122791, 0.0400887]
@test norm(res, 1) 0.0 atol = length(swc10) * eps(Float32)

swc20 = HypothesisTests.ShapiroWilkCoefs(20)
res = swc20.A .-
swc20 = HypothesisTests.shapiro_wilk_coefs(20)
res = swc20[1:10] .-
[0.473371, 0.32174, 0.255663, 0.208297, 0.16864, 0.133584, 0.101474,
0.0712893, 0.0423232, 0.0140351]
@test norm(res, 1) 0.0 atol = length(swc20) * eps(Float32)
Expand All @@ -52,10 +44,10 @@ using StableRNGs
# syntactic tests

@test_throws ArgumentError ShapiroWilkTest([1, 2])
@test_throws ArgumentError("at least 3 samples are required, got 2") ShapiroWilkTest([1, 2], HypothesisTests.ShapiroWilkCoefs(3))
@test_throws ArgumentError("at least 3 samples are required, got 2") ShapiroWilkTest([1, 2], HypothesisTests.shapiro_wilk_coefs(3))
@test_throws ArgumentError ShapiroWilkTest([1, 2, 3], censored=4)
@test_throws DimensionMismatch ShapiroWilkTest([1, 2, 3],
HypothesisTests.ShapiroWilkCoefs(4))
HypothesisTests.shapiro_wilk_coefs(4))

@test_throws ArgumentError("sample doesn't seem to be sorted or is constant (up to numerical accuracy)") ShapiroWilkTest([1,1,1])
@test_throws ArgumentError("sample is constant (up to numerical accuracy)") ShapiroWilkTest([1,1,1], sorted=false)
Expand Down Expand Up @@ -110,8 +102,8 @@ using StableRNGs
# *Statistics and Computing* (1992) **2**, 117-119

X = [48.4, 49.0, 59.5, 59.6, 60.7, 88.8, 98.2, 109.4, 169.1, 227.1]
swc = HypothesisTests.ShapiroWilkCoefs(length(X))
@test norm(swc.A .- [0.5737, 0.3290, 0.2143, 0.1228, 0.0401], Inf) < 5.0e-5
swc = HypothesisTests.shapiro_wilk_coefs(length(X))
@test norm(swc[1:5] .- [0.5737, 0.3290, 0.2143, 0.1228, 0.0401], Inf) < 5.0e-5
W = HypothesisTests.unsafe_swstat(X, swc)
@test W 0.8078 atol = 2.9e-5

Expand Down

0 comments on commit 284d780

Please sign in to comment.