From c66fc41a29e2d19989d1441d6606c5ccdbc7e6be Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Tue, 12 Sep 2023 14:41:35 -0400 Subject: [PATCH] Fall back to base `partialsort!` for very large arrays --- Project.toml | 2 +- src/Sorting/quicksort.jl | 78 +++------------------------------------- test/testSorting.jl | 18 ---------- 3 files changed, 6 insertions(+), 92 deletions(-) diff --git a/Project.toml b/Project.toml index dbc3866..7fd991f 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "NaNStatistics" uuid = "b946abbf-3ea7-4610-9019-9858bfdeaf2d" authors = ["C. Brenhin Keller"] -version = "0.6.30" +version = "0.6.31" [deps] IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" diff --git a/src/Sorting/quicksort.jl b/src/Sorting/quicksort.jl index f8689c4..f098d16 100644 --- a/src/Sorting/quicksort.jl +++ b/src/Sorting/quicksort.jl @@ -75,6 +75,11 @@ end # Partially sort `A` around the `k`th sorted element and return that element function quickselect!(A::AbstractArray, iₗ=firstindex(A), iᵤ=lastindex(A), k=(iₗ+iᵤ)÷2) + # Fall back to Base implementation for very large arrays + if iᵤ-iₗ > 20000 + return Base.Sort.partialsort!(view(A, iₗ:iᵤ), k-(iₗ-1)) + end + # Pick a pivot for partitioning N = iᵤ - iₗ + 1 A[iₗ], A[k] = A[k], A[iₗ] @@ -181,76 +186,3 @@ function quicksort!(A, iₗ=firstindex(A), iᵤ=lastindex(A)) quicksort!(A, iₚ+1, iᵤ) end end - -# Sort `A`, assuming no NaNs, multithreaded -function quicksortt!(A, iₗ=firstindex(A), iᵤ=lastindex(A), level=1) - if issortedrange(A, iₗ, iᵤ) - # If already sorted, we're done here - return A - end - # Otherwise, we have to sort - N = iᵤ - iₗ + 1 - if isantisortedrange(A, iₗ, iᵤ) - vreverse!(A, iₗ, iᵤ) - return A - elseif N == 3 - # We know we are neither sorted nor antisorted, so only four possibilities remain - iₘ = iₗ + 1 - a,b,c = A[iₗ], A[iₘ], A[iᵤ] - if a <= b - if a <= c - A[iₘ], A[iᵤ] = c, b # a ≤ c ≤ b - else - A[iₗ], A[iₘ], A[iᵤ] = c, a, b # c ≤ a ≤ b - end - else - if a <= c - A[iₗ], A[iₘ] = b, a # b ≤ a ≤ c - else - A[iₗ], A[iₘ], A[iᵤ] = b, c, a # b ≤ c ≤ a - end - end - return A - else - # Pick a pivot for partitioning - iₚ = iₗ + (N >> 2) - A[iₗ], A[iₚ] = A[iₚ], A[iₗ] - pivot = A[iₗ] - - # Count up elements that must be moved to upper partition - Nᵤ = 0 - @turbo for i = (iₗ+1):iᵤ - Nᵤ += A[i] >= pivot - end - Nₗ = N - Nᵤ - - # Swap elements between upper and lower partitions - i = iₗ - j = iᵤ - @inbounds for n = 1:Nₗ-1 - i = iₗ + n - if A[i] >= pivot - while A[j] >= pivot - j -= 1 - end - j <= i && break - A[i], A[j] = A[j], A[i] - j -= 1 - end - end - # Move pivot to the top of the lower partition - iₚ = iₗ + Nₗ - 1 - A[iₗ], A[iₚ] = A[iₚ], A[iₗ] - # Recurse: sort both upper and lower partitions - if level < 7 - @sync begin - Threads.@spawn quicksortt!(A, iₗ, iₚ, level+1) - Threads.@spawn quicksortt!(A, iₚ+1, iᵤ, level+1) - end - else - quicksort!(A, iₗ, iₚ) - quicksort!(A, iₚ+1, iᵤ) - end - return A - end -end diff --git a/test/testSorting.jl b/test/testSorting.jl index bd0e310..f879dac 100644 --- a/test/testSorting.jl +++ b/test/testSorting.jl @@ -22,22 +22,6 @@ NaNStatistics.quicksort!(A) @test A == B - # Multithreaded quicksort - A = rand(100) - B = sort(A) - NaNStatistics.quicksortt!(A) - @test A == B - - A = rand(1_000) - B = sort(A) - NaNStatistics.quicksortt!(A) - @test A == B - - A = rand(1_000_000) - B = sort(A) - NaNStatistics.quicksortt!(A) - @test A == B - # Partialsort A = rand(101) m = median(A) @@ -52,8 +36,6 @@ # Quicksort of already-sorted arrays @test NaNStatistics.quicksort!(collect(1:100)) == 1:100 @test NaNStatistics.quicksort!(collect(100:-1:1)) == 1:100 - @test NaNStatistics.quicksortt!(collect(1:100)) == 1:100 - @test NaNStatistics.quicksortt!(collect(100:-1:1)) == 1:100 # # Vsort, Float64 # A = rand(100)