From 8d462d5b9ffeddbec1a3ea6f965ec075e273e1c1 Mon Sep 17 00:00:00 2001 From: Wonseok Shin Date: Tue, 26 Apr 2022 21:23:33 -0400 Subject: [PATCH 1/4] Calculate mean more accurately inside mean_and_{var,std}() --- src/moments.jl | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/moments.jl b/src/moments.jl index 5b0a66147..206edc885 100644 --- a/src/moments.jl +++ b/src/moments.jl @@ -1,3 +1,8 @@ +require_accu_mean(x::AbstractArray) = length(x)≥4 && x[1]==x[2]==x[end-1]==x[end] + +calc_∆m(x::AbstractArray, m::Number) = sum(xᵢ->xᵢ-m, x) / length(x) +calc_∆m(x::AbstractArray, w::AbstractWeights, m::Number) = mapreduce((xᵢ,wᵢ)->(xᵢ-m)*wᵢ, +, x, w) / sum(w) + ##### Weighted var & std ## var @@ -100,6 +105,7 @@ See [`var`](@ref) documentation for more details. """ function mean_and_var(x; corrected::Bool=true) m = mean(x) + require_accu_mean(x) && (m += calc_∆m(x, m)) v = var(x, mean=m, corrected=corrected) m, v end @@ -116,40 +122,44 @@ See [`std`](@ref) documentation for more details. """ function mean_and_std(x; corrected::Bool=true) m = mean(x) + require_accu_mean(x) && (m += calc_∆m(x, m)) s = std(x, mean=m, corrected=corrected) m, s end function mean_and_var(x::RealArray, w::AbstractWeights; corrected::DepBool=nothing) m = mean(x, w) + require_accu_mean(x) && (m += calc_∆m(x, m)) v = var(x, w, mean=m, corrected=depcheck(:mean_and_var, :corrected, corrected)) m, v end + function mean_and_std(x::RealArray, w::AbstractWeights; corrected::DepBool=nothing) m = mean(x, w) + require_accu_mean(x) && (m += calc_∆m(x, w, m)) s = std(x, w, mean=m, corrected=depcheck(:mean_and_std, :corrected, corrected)) m, s end - function mean_and_var(x::RealArray, dim::Int; corrected::Bool=true) m = mean(x, dims=dim) v = var(x, dims=dim, mean=m, corrected=corrected) m, v end + function mean_and_std(x::RealArray, dim::Int; corrected::Bool=true) m = mean(x, dims=dim) s = std(x, dims=dim, mean=m, corrected=corrected) m, s end - function mean_and_var(x::RealArray, w::AbstractWeights, dims::Int; corrected::DepBool=nothing) m = mean(x, w, dims=dims) v = var(x, w, dims, mean=m, corrected=depcheck(:mean_and_var, :corrected, corrected)) m, v end + function mean_and_std(x::RealArray, w::AbstractWeights, dims::Int; corrected::DepBool=nothing) m = mean(x, w, dims=dims) From 300a8fb0df4020418677c606c226e1cd0d20433c Mon Sep 17 00:00:00 2001 From: Wonseok Shin Date: Thu, 28 Apr 2022 20:04:31 -0400 Subject: [PATCH 2/4] Calculate mean more accurately inside mean_and_{var,std}() taking dims --- src/moments.jl | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/moments.jl b/src/moments.jl index 206edc885..3aca0b459 100644 --- a/src/moments.jl +++ b/src/moments.jl @@ -143,12 +143,34 @@ end function mean_and_var(x::RealArray, dim::Int; corrected::Bool=true) m = mean(x, dims=dim) + + sz = size(x) + K = length(sz) + N = sz[dim] + + CI = CartesianIndices(ntuple(k->(k==dim ? (0:0) : (1:sz[k])), Val(K))) + ∆ci = CartesianIndex(ntuple(k->(k==dim ? 1 : 0), Val(K))) + for ci = CI + require_accu_mean(x, ci, ∆ci, N) && (m[ci+∆ci] += calc_∆m(x, m, ci, ∆ci, N)) + end + v = var(x, dims=dim, mean=m, corrected=corrected) m, v end function mean_and_std(x::RealArray, dim::Int; corrected::Bool=true) m = mean(x, dims=dim) + + sz = size(x) + K = length(sz) + N = sz[dim] + + CI = CartesianIndices(ntuple(k->(k==dim ? (0:0) : (1:sz[k])), Val(K))) + ∆ci = CartesianIndex(ntuple(k->(k==dim ? 1 : 0), Val(K))) + for ci = CI + require_accu_mean(x, ci, ∆ci, N) && (m[ci+∆ci] += calc_∆m(x, m, ci, ∆ci, N)) + end + s = std(x, dims=dim, mean=m, corrected=corrected) m, s end @@ -156,6 +178,17 @@ end function mean_and_var(x::RealArray, w::AbstractWeights, dims::Int; corrected::DepBool=nothing) m = mean(x, w, dims=dims) + + sz = size(x) + K = length(sz) + N = sz[dims] + + CI = CartesianIndices(ntuple(k->(k==dims ? (0:0) : (1:sz[k])), Val(K))) + ∆ci = CartesianIndex(ntuple(k->(k==dims ? 1 : 0), Val(K))) + for ci = CI + require_accu_mean(x, ci, ∆ci, N) && (m[ci+∆ci] += calc_∆m(x, w, m, ci, ∆ci, N)) + end + v = var(x, w, dims, mean=m, corrected=depcheck(:mean_and_var, :corrected, corrected)) m, v end @@ -163,6 +196,17 @@ end function mean_and_std(x::RealArray, w::AbstractWeights, dims::Int; corrected::DepBool=nothing) m = mean(x, w, dims=dims) + + sz = size(x) + K = length(sz) + N = sz[dims] + + CI = CartesianIndices(ntuple(k->(k==dims ? (0:0) : (1:sz[k])), Val(K))) + ∆ci = CartesianIndex(ntuple(k->(k==dims ? 1 : 0), Val(K))) + for ci = CI + require_accu_mean(x, ci, ∆ci, N) && (m[ci+∆ci] += calc_∆m(x, w, m, ci, ∆ci, N)) + end + s = std(x, w, dims, mean=m, corrected=depcheck(:mean_and_std, :corrected, corrected)) m, s end From a538f47b21521c3f5b37d489e6cb56eb2c169cd4 Mon Sep 17 00:00:00 2001 From: Wonseok Shin Date: Thu, 28 Apr 2022 20:05:31 -0400 Subject: [PATCH 3/4] =?UTF-8?q?Support=20iterator=20type=20x=20in=20requir?= =?UTF-8?q?e=5Faccu=5Fmean()=20and=20calc=5F=E2=88=86m()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/moments.jl | 73 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 71 insertions(+), 2 deletions(-) diff --git a/src/moments.jl b/src/moments.jl index 3aca0b459..11910bd20 100644 --- a/src/moments.jl +++ b/src/moments.jl @@ -1,7 +1,76 @@ require_accu_mean(x::AbstractArray) = length(x)≥4 && x[1]==x[2]==x[end-1]==x[end] +require_accu_mean(x::AbstractArray, + ci::CartesianIndex{K}, # location of vector to examine + ∆ci::CartesianIndex{K}, # (0, ..., 0, 1, 0, ..., 0); CartesianIndex(ntuple(k->k==dim, Val(K))) + N::Int # size(x, dim) + ) where {K} = + N≥4 && x[ci+∆ci] == x[ci+2∆ci] == x[ci+(N-1)∆ci] == x[ci+N*∆ci] + +# For non-array x (e.g., iterator) +function require_accu_mean(x) + vs₁ = iterate(x) + vs₁ === nothing && return false + + val₁, st = vs₁ + count = 1 + while (count+=1) ≤ 4 + vs = iterate(x, st) + vs === nothing && return false # do not require accurate mean Inf length(x) < 4 + val, st = vs + val != val₁ && return false + end + + return true +end + +function calc_∆m(x, m::Number) + ∆m = zero(m) + + vs = iterate(x) + count = 0 + while vs !== nothing + count += 1 + val, st = vs + ∆m += val - m + vs = iterate(x, st) + end + ∆m /= count + + return ∆m +end + +calc_∆m(x, w::AbstractWeights, m::Number) = mapreduce((xᵢ,wᵢ)->(xᵢ-m)*wᵢ, +, x, w) / sum(w) +function calc_∆m(x::AbstractArray, + m::Number, # mean calculated with rounding errors + ci::CartesianIndex{K}, # location of vector to examine + ∆ci::CartesianIndex{K}, # (0, ..., 0, 1, 0, ..., 0); CartesianIndex(ntuple(k->k==dim, Val(K))) + N::Int # size(x, dim) + ) where {K} + ∆m = zero(m) + for j = 1:N + ∆m += x[ci+j*∆ci] + end + ∆m /= N + + return ∆m +end + +function calc_∆m(x::AbstractArray, + w::AbstractWeights, + m::Number, # mean calculated with rounding errors + ci::CartesianIndex{K}, # location of vector to examine + ∆ci::CartesianIndex{K}, # (0, ..., 0, 1, 0, ..., 0); CartesianIndex(ntuple(k->k==dim, Val(K))) + N::Int # size(x, dim) + ) where {K} + ∆m = zero(m) + for j = 1:N + ∆m += w[j] * x[ci+j*∆ci] + end + ∆m /= sum(w) + + return ∆m +end -calc_∆m(x::AbstractArray, m::Number) = sum(xᵢ->xᵢ-m, x) / length(x) -calc_∆m(x::AbstractArray, w::AbstractWeights, m::Number) = mapreduce((xᵢ,wᵢ)->(xᵢ-m)*wᵢ, +, x, w) / sum(w) ##### Weighted var & std From 27e429903451553d561036cdc1e4abc5ce95589a Mon Sep 17 00:00:00 2001 From: Wonseok Shin Date: Fri, 29 Apr 2022 19:53:11 -0400 Subject: [PATCH 4/4] Add test for improved zscore() --- test/scalarstats.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/scalarstats.jl b/test/scalarstats.jl index 48c64ff0f..b6db3040a 100644 --- a/test/scalarstats.jl +++ b/test/scalarstats.jl @@ -86,6 +86,7 @@ z2 = [8. 2. 3. 1.; 24. 10. -1. -1.; 20. 12. 1. -2.] @test zscore(a, 1) ≈ zscore(a, mean(a, dims=1), std(a, dims=1)) @test zscore(a, 2) ≈ zscore(a, mean(a, dims=2), std(a, dims=2)) +@test all(isnan, zscore(fill(log(1e-5), 8))) # Issue #196 ###### quantile & friends