From a1a2d1fa4e160b2dd119d592531983f2c0b06f93 Mon Sep 17 00:00:00 2001 From: mertcandav Date: Mon, 2 Sep 2024 18:19:35 +0300 Subject: [PATCH] std::slices: add the SortFunc function --- std/slices/slices.jule | 21 --- std/slices/sort.jule | 359 ++++------------------------------- std/slices/sort_test.jule | 41 ++++ std/slices/sortfunc.jule | 357 +++++++++++++++++++++++++++++++++++ std/slices/sortordered.jule | 365 ++++++++++++++++++++++++++++++++++++ 5 files changed, 795 insertions(+), 348 deletions(-) create mode 100644 std/slices/sortfunc.jule create mode 100644 std/slices/sortordered.jule diff --git a/std/slices/slices.jule b/std/slices/slices.jule index fa16f0824..f4a245356 100644 --- a/std/slices/slices.jule +++ b/std/slices/slices.jule @@ -2,27 +2,6 @@ // Use of this source code is governed by a BSD 3-Clause // license that can be found in the LICENSE file. -use bits for std::math::bits -use cmp for std::internal::cmp - -// Sorts a slice of any ordered type in ascending order. -// When sorting floating-point numbers, NaNs are ordered before other values. -fn Sort[S: []E, E: ordered](mut s: S) { - n := len(s) - pdqsort(s, 0, n, bits::Len(uint(n))) -} - -// Reports whether x is sorted in ascending order. -fn IsSorted[S: []E, E: ordered](mut s: S): bool { - mut i := len(s) - 1 - for i > 0; i-- { - if cmp::Less(s[i], s[i-1]) { - ret false - } - } - ret true -} - // Reports whether slices are the same length and contains same elements. // The nil slices considered as zero-length slices. // The floating-point NaNs are not considered equal. diff --git a/std/slices/sort.jule b/std/slices/sort.jule index 9add8a2ad..8bcf98af5 100644 --- a/std/slices/sort.jule +++ b/std/slices/sort.jule @@ -3,7 +3,7 @@ // license that can be found in the LICENSE file. // The Jule code is a modified version of the original Go code from -// https://github.com/golang/go/blob/0df681248862a34ff1233cdc4cf0b036e4761652/src/slices/zsortordered.go and came with this notice. +// https://github.com/golang/go/blob/0df681248862a34ff1233cdc4cf0b036e4761652/src/slices/sort.go and came with this notice. // // ==================================================== // Copyright (c) 2009 The Go Authors. All rights reserved. @@ -38,342 +38,47 @@ use bits for std::math::bits use cmp for std::internal::cmp -enum sortedHint { - Unknown, - Increasing, - Decreasing, +// Sorts a slice of any ordered type in ascending order. +// When sorting floating-point numbers, NaNs are ordered before other values. +fn Sort[S: []E, E: ordered](mut s: S) { + n := len(s) + pdqsort(s, 0, n, bits::Len(uint(n))) } -// xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf -type xorshift: u64 - -fn xorshiftNext(mut &r: xorshift): u64 { - r ^= r << 13 - r ^= r >> 17 - r ^= r << 5 - ret u64(r) -} - -fn nextPowerOfTwo(length: int): uint { - ret 1 << bits::Len(uint(length)) -} - -// Sorts data[a:b] using insertion sort. -fn insertionSort[E: ordered](mut &data: []E, a: int, b: int) { - mut i := a + 1 - for i < b; i++ { - mut j := i - for j > a && cmp::Less(data[j], data[j-1]); j-- { - data[j], data[j-1] = data[j-1], data[j] - } - } -} - -// Implements the heap property on data[lo:hi]. -// first is an offset into the array where the root of the heap lies. -fn siftDown[E: ordered](mut &data: []E, lo: int, hi: int, first: int) { - mut root := lo - for { - mut child := 2 * root + 1 - if child >= hi { - break - } - if child+1 < hi && cmp::Less(data[first+child], data[first+child+1]) { - child++ - } - if !cmp::Less(data[first+root], data[first+child]) { - ret - } - data[first+root], data[first+child] = data[first+child], data[first+root] - root = child - } -} - -fn heapSort[E: ordered](mut &data: []E, a: int, b: int) { - first := a - lo := 0 - hi := b - a - - // Build heap with greatest element at top. - mut i := (hi - 1) >> 1 - for i >= 0; i-- { - siftDown(data, i, hi, first) - } - - // Pop elements, largest first, into end of data. - i = hi - 1 - for i >= 0; i-- { - data[first], data[first+1] = data[first+1], data[first] - siftDown(data, lo, i, first) - } -} - -// breakPatternsOrdered scatters some elements around in an attempt to break some patterns -// that might cause imbalanced partitions in quicksort. -fn breakPatterns[E: ordered](mut &data: []E, a: int, b: int) { - length := b - a - if length >= 8 { - mut random := xorshift(length) - modulus := nextPowerOfTwo(length) - - mut idx := a + (length / 4) * 2 - 1 - for idx <= a+(length / 4)*2+1; idx++ { - mut other := int(uint(xorshiftNext(random)) & (modulus - 1)) - if other >= length { - other -= length - } - data[idx], data[a+other] = data[a+other], data[idx] - } - } -} - -// Returns x,y where data[x] <= data[y], where x,y=a,b or x,y=b,a. -fn order2O[E: ordered](data: []E, a: int, b: int, mut &swaps: int): (int, int) { - if cmp::Less(data[b], data[a]) { - swaps++ - ret b, a - } - ret a, b -} - -// Returns x where data[x] is the median of data[a],data[b],data[c], where x is a, b, or c. -fn median[E: ordered](data: []E, mut a: int, mut b: int, mut c: int, mut &swaps: int): int { - a, b = order2O(data, a, b, swaps) - b, c = order2O(data, b, c, swaps) - a, b = order2O(data, a, b, swaps) - ret b -} - -// Finds the median of data[a - 1], data[a], data[a + 1] and stores the index into a. -fn medianAdjacent[E: ordered](data: []E, a: int, mut &swaps: int): int { - ret median(data, a - 1, a, a + 1, swaps) -} - -// Chooses a pivot in data[a:b]. -// -// [0,8): chooses a static pivot. -// [8,shortestNinther): uses the simple median-of-three method. -// [shortestNinther,∞): uses the Tukey ninther method. -fn choosePivot[E: ordered](mut &data: []E, a: int, b: int): (pivot: int, hint: sortedHint) { - const shortestNinther = 50 - const maxSwaps = 4 * 3 - - l := b - a - - mut swaps := 0 - mut i := a + l / 4 * 1 - mut j := a + l / 4 * 2 - mut k := a + l / 4 * 3 - - if l >= 8 { - if l >= shortestNinther { - // Tukey ninther method, the idea came from Rust's implementation. - i = medianAdjacent(data, i, swaps) - j = medianAdjacent(data, j, swaps) - k = medianAdjacent(data, k, swaps) - } - // Find the median among i, j, k and stores it into j. - j = median(data, i, j, k, swaps) - } - - match swaps { - | 0: - ret j, sortedHint.Increasing - | maxSwaps: - ret j, sortedHint.Decreasing - |: - ret j, sortedHint.Unknown - } -} - -fn reverseRange[E: ordered](mut &data: []E, a: int, b: int) { - mut i := a - mut j := b - 1 - for i < j { - data[i], data[j] = data[j], data[i] - i++ - j-- - } -} - -// Partially sorts a slice, returns true if the slice is sorted at the end. -fn partialInsertionSort[E: ordered](mut &data: []E, a: int, b: int): bool { - const maxSteps = 5 // maximum number of adjacent out-of-order pairs that will get shifted - const shortestShifting = 50 // don't shift any elements on short arrays - mut i := a + 1 - mut j := 0 - for j < maxSteps; j++ { - for i < b && !cmp::Less(data[i], data[i-1]) { - i++ - } - - if i == b { - ret true - } - - if b-a < shortestShifting { +// Reports whether x is sorted in ascending order. +fn IsSorted[S: []E, E: ordered](mut s: S): bool { + mut i := len(s) - 1 + for i > 0; i-- { + if cmp::Less(s[i], s[i-1]) { ret false } - - data[i], data[i-1] = data[i-1], data[i] - - // Shift the smaller one to the left. - if i-a >= 2 { - mut z := i - 1 - for z >= 1; j-- { - if !cmp::Less(data[z], data[z-1]) { - break - } - data[z], data[z-1] = data[z-1], data[z] - } - } - // Shift the greater one to the right. - if b-i >= 2 { - mut z := i + 1 - for z < b; z++ { - if !cmp::Less(data[z], data[z-1]) { - break - } - data[z], data[z-1] = data[z-1], data[z] - } - } } - ret false + ret true } -// Partitions data[a:b] into elements equal to data[pivot] followed by elements greater than data[pivot]. -// It assumed that data[a:b] does not contain elements smaller than the data[pivot]. -fn partitionEqual[E: ordered](mut &data: []E, a: int, b: int, pivot: int): (newpivot: int) { - data[a], data[pivot] = data[pivot], data[a] - mut i, mut j := a + 1, b - 1 // i and j are inclusive of the elements remaining to be partitioned - - for { - for i <= j && !cmp::Less(data[a], data[i]) { - i++ - } - for i <= j && cmp::Less(data[a], data[j]) { - j-- - } - if i > j { - break - } - data[i], data[j] = data[j], data[i] - i++ - j-- - } - ret i +// Sorts the slice s in ascending order as determined by the cmp +// function. This sort is not guaranteed to be stable. +// cmp(a, b) should return a negative number when a < b, a positive number when +// a > b and zero when a == b. +// +// SortFunc requires that cmp is a strict weak ordering. +// See https://en.wikipedia.org/wiki/Weak_ordering#Strict_weak_orderings. +fn SortFunc[S: []E, E](mut s: S, cmp: fn(a: E, b: E): int) { + n := len(s) + pdqsortFunc(s, 0, n, bits::Len(uint(n)), cmp) } -// Does one quicksort partition. -// let p = data[pivot] -// Moves elements in data[a:b] around, so that data[i]

=p for inewpivot. -// On return, data[newpivot] = p -fn partition[E: ordered](mut &data: []E, a: int, b: int, pivot: int): (newpivot: int, alreadyPartitioned: bool) { - data[a], data[pivot] = data[pivot], data[a] - mut i, mut j := a + 1, b - 1 // i and j are inclusive of the elements remaining to be partitioned - - for i <= j && cmp::Less(data[i], data[a]) { - i++ - } - for i <= j && !cmp::Less(data[j], data[a]) { - j-- - } - if i > j { - data[j], data[a] = data[a], data[j] - ret j, true - } - data[i], data[j] = data[j], data[i] - i++ - j-- - - for { - for i <= j && cmp::Less(data[i], data[a]) { - i++ - } - for i <= j && !cmp::Less(data[j], data[a]) { - j-- - } - if i > j { - break - } - data[i], data[j] = data[j], data[i] - i++ - j-- - } - data[j], data[a] = data[a], data[j] - ret j, false +fn nextPowerOfTwo(length: int): uint { + shift := uint(bits::Len(uint(length))) + ret uint(1 << shift) } -// Sorts data[a:b]. -// The algorithm based on pattern-defeating quicksort(pdqsort), but without the optimizations from BlockQuicksort. -// pdqsort paper: https://arxiv.org/pdf/2106.05123.pdf -// C++ implementation: https://github.com/orlp/pdqsort -// Rust implementation: https://docs.rs/pdqsort/latest/pdqsort/ -// limit is the number of allowed bad (very unbalanced) pivots before falling back to heapsort. -fn pdqsort[E: ordered](mut &data: []E, mut a: int, mut b: int, mut limit: int) { - const maxInsertion = 12 - - mut wasBalanced := true // whether the last partitioning was reasonably balanced - mut wasPartitioned := true // whether the slice was already partitioned - - for { - length := b - a - - if length <= maxInsertion { - insertionSort(data, a, b) - ret - } - - // Fall back to heapsort if too many bad choices were made. - if limit == 0 { - heapSort(data, a, b) - ret - } - - // If the last partitioning was imbalanced, we need to breaking patterns. - if !wasBalanced { - breakPatterns(data, a, b) - limit-- - } - - mut pivot, mut hint := choosePivot(data, a, b) - if hint == sortedHint.Decreasing { - reverseRange(data, a, b) - // The chosen pivot was pivot-a elements after the start of the array. - // After reversing it is pivot-a elements before the end of the array. - // The idea came from Rust's implementation. - pivot = (b - 1) - (pivot - a) - hint = sortedHint.Increasing - } - - // The slice is likely already sorted. - if wasBalanced && wasPartitioned && hint == sortedHint.Increasing { - if partialInsertionSort(data, a, b) { - ret - } - } - - // Probably the slice contains many duplicate elements, partition the slice into - // elements equal to and elements greater than the pivot. - if a > 0 && !cmp::Less(data[a-1], data[pivot]) { - mid := partitionEqual(data, a, b, pivot) - a = mid - continue - } - - mid, alreadyPartitioned := partition(data, a, b, pivot) - wasPartitioned = alreadyPartitioned +// xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf +type xorshift: u64 - leftLen, rightLen := mid - a, b - mid - balanceThreshold := length / 8 - if leftLen < rightLen { - wasBalanced = leftLen >= balanceThreshold - pdqsort(data, a, mid, limit) - a = mid + 1 - } else { - wasBalanced = rightLen >= balanceThreshold - pdqsort(data, mid + 1, b, limit) - b = mid - } - } +fn xorshiftNext(mut &r: xorshift): u64 { + r ^= r << 13 + r ^= r >> 17 + r ^= r << 5 + ret u64(r) } \ No newline at end of file diff --git a/std/slices/sort_test.jule b/std/slices/sort_test.jule index c77dd4a6b..a444e7285 100644 --- a/std/slices/sort_test.jule +++ b/std/slices/sort_test.jule @@ -54,4 +54,45 @@ fn testStrs(t: &T) { t.Errorf("sorted: {}", caseStrs) t.Errorf(" got: {}", case) } +} + +#test +fn testIntsFunc(t: &T) { + mut case := cloneSlice(caseInts) + SortFunc(case, fn(a: int, b: int): int { ret compare(a, b) }) + if !IsSorted(case) { + t.Errorf("sorted: {}", caseInts) + t.Errorf(" got: {}", case) + } +} + +#test +fn testF64sFunc(t: &T) { + mut case := cloneSlice(caseF64s) + SortFunc(case, fn(a: f64, b: f64): int { ret compare(a, b) }) + if !IsSorted(case) { + t.Errorf("sorted: {}", caseF64s) + t.Errorf(" got: {}", case) + } +} + +#test +fn testStrsFunc(t: &T) { + mut case := cloneSlice(caseStrs) + SortFunc(case, fn(a: str, b: str): int { ret compare(a, b) }) + if !IsSorted(case) { + t.Errorf("sorted: {}", caseStrs) + t.Errorf(" got: {}", case) + } +} + +fn compare[T](a: T, b: T): int { + match { + | a < b: + ret -1 + | a > b: + ret +1 + |: + ret 0 + } } \ No newline at end of file diff --git a/std/slices/sortfunc.jule b/std/slices/sortfunc.jule new file mode 100644 index 000000000..fc4307d92 --- /dev/null +++ b/std/slices/sortfunc.jule @@ -0,0 +1,357 @@ +// Copyright 2024 The Jule Programming Language. +// Use of this source code is governed by a BSD 3-Clause +// license that can be found in the LICENSE file. + +// The Jule code is a modified version of the original Go code from +// https://github.com/golang/go/blob/0df681248862a34ff1233cdc4cf0b036e4761652/src/slices/zsortanyfunc.go and came with this notice. +// +// ==================================================== +// Copyright (c) 2009 The Go Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// ==================================================== + +// Sorts data[a:b] using insertion sort. +fn insertionSortFunc[S: []E, E](mut data: S, a: int, b: int, cmp: fn(a: E, b: E): int) { + mut i := a + 1 + for i < b; i++ { + mut j := i + for j > a && (cmp(data[j], data[j-1]) < 0); j-- { + data[j], data[j-1] = data[j-1], data[j] + } + } +} + +// Implements the heap property on data[lo:hi]. +// first is an offset into the array where the root of the heap lies. +fn siftDownFunc[S: []E, E](mut data: S, lo: int, hi: int, first: int, cmp: fn(a: E, b: E): int) { + mut root := lo + for { + mut child := 2 * root + 1 + if child >= hi { + break + } + if child+1 < hi && (cmp(data[first+child], data[first+child+1]) < 0) { + child++ + } + if !(cmp(data[first+root], data[first+child]) < 0) { + ret + } + data[first+root], data[first+child] = data[first+child], data[first+root] + root = child + } +} + +fn heapSortFunc[S: []E, E](mut data: S, a: int, b: int, cmp: fn(a: E, b: E): int) { + first := a + lo := 0 + hi := b - a + + // Build heap with greatest element at top. + mut i := (hi - 1) >> 1 + for i >= 0; i-- { + siftDownFunc(data, i, hi, first, cmp) + } + + // Pop elements, largest first, into end of data. + i = hi - 1 + for i >= 0; i-- { + data[first], data[first+i] = data[first+i], data[first] + siftDownFunc(data, lo, i, first, cmp) + } +} + +// Scatters some elements around in an attempt to break some patterns +// that might cause imbalanced partitions in quicksort. +fn breakPatternsFunc[S: []E, E](mut data: S, a: int, b: int, cmp: fn(a: E, b: E): int) { + length := b - a + if length >= 8 { + mut random := xorshift(length) + modulus := nextPowerOfTwo(length) + + mut idx := a + (length >> 2) << 1 - 1 + for idx <= a+(length >> 2)<<1+1; idx++ { + mut other := int(uint(xorshiftNext(random)) & (modulus - 1)) + if other >= length { + other -= length + } + data[idx], data[a+other] = data[a+other], data[idx] + } + } +} + +// Returns x,y where data[x] <= data[y], where x,y=a,b or x,y=b,a. +fn order2Func[S: []E, E](data: S, a: int, b: int, mut &swaps: int, cmp: fn(a: E, b: E): int): (int, int) { + if cmp(data[b], data[a]) < 0 { + swaps++ + ret b, a + } + ret a, b +} + +// Returns x where data[x] is the median of data[a],data[b],data[c], where x is a, b, or c. +fn medianFunc[S: []E, E](data: S, mut a: int, mut b: int, mut c: int, mut &swaps: int, cmp: fn(a: E, b: E): int): int { + a, b = order2Func(data, a, b, swaps, cmp) + b, c = order2Func(data, b, c, swaps, cmp) + a, b = order2Func(data, a, b, swaps, cmp) + ret b +} + +// Finds the median of data[a - 1], data[a], data[a + 1] and stores the index into a. +fn medianAdjacentFunc[S: []E, E](mut data: S, a: int, mut &swaps: int, cmp: fn(a: E, b: E): int): int { + ret medianFunc(data, a - 1, a, a + 1, swaps, cmp) +} + +// Chooses a pivot in data[a:b]. +// +// [0,8): chooses a static pivot. +// [8,shortestNinther): uses the simple median-of-three method. +// [shortestNinther,∞): uses the Tukey ninther method. +fn choosePivotFunc[S: []E, E](mut data: S, a: int, b: int, cmp: fn(a: E, b: E): int): (pivot: int, hint: sortedHint) { + const shortestNinther = 50 + const maxSwaps = 4 * 3 + + l := b - a + + mut swaps := 0 + mut i := a + l >> 2 * 1 + mut j := a + l >> 2 * 2 + mut k := a + l >> 2 * 3 + + if l >= 8 { + if l >= shortestNinther { + // Tukey ninther method, the idea came from Rust's implementation. + i = medianAdjacentFunc(data, i, swaps, cmp) + j = medianAdjacentFunc(data, j, swaps, cmp) + k = medianAdjacentFunc(data, k, swaps, cmp) + } + // Find the median among i, j, k and stores it into j. + j = medianFunc(data, i, j, k, swaps, cmp) + } + + match swaps { + | 0: + ret j, sortedHint.Increasing + | maxSwaps: + ret j, sortedHint.Decreasing + |: + ret j, sortedHint.Unknown + } +} + +fn reverseRangeFunc[S: []E, E](mut data: S, a: int, b: int, cmp: fn(a: E, b: E): int) { + mut i := a + mut j := b - 1 + for i < j { + data[i], data[j] = data[j], data[i] + i++ + j-- + } +} + +// Partially sorts a slice, returns true if the slice is sorted at the end. +fn partialInsertionSortFunc[S: []E, E](mut data: S, a: int, b: int, cmp: fn(a: E, b: E): int): bool { + const maxSteps = 5 // maximum number of adjacent out-of-order pairs that will get shifted + const shortestShifting = 50 // don't shift any elements on short arrays + + mut i := a + 1 + mut j := 0 + for j < maxSteps; j++ { + for i < b && !(cmp(data[i], data[i-1]) < 0) { + i++ + } + + if i == b { + ret true + } + + if b-a < shortestShifting { + ret false + } + + data[i], data[i-1] = data[i-1], data[i] + + // Shift the smaller one to the left. + if i-a >= 2 { + j = i - 1 + for j >= 1; j-- { + if !(cmp(data[j], data[j-1]) < 0) { + break + } + data[j], data[j-1] = data[j-1], data[j] + } + } + // Shift the greater one to the right. + if b-i >= 2 { + j = i + 1 + for j < b; j++ { + if !(cmp(data[j], data[j-1]) < 0) { + break + } + data[j], data[j-1] = data[j-1], data[j] + } + } + } + ret false +} + +// Partitions data[a:b] into elements equal to data[pivot] followed by elements greater than data[pivot]. +// It assumed that data[a:b] does not contain elements smaller than the data[pivot]. +fn partitionEqualFunc[S: []E, E](mut data: S, a: int, b: int, pivot: int, cmp: fn(a: E, b: E): int): (newpivot: int) { + data[a], data[pivot] = data[pivot], data[a] + mut i, mut j := a + 1, b - 1 // i and j are inclusive of the elements remaining to be partitioned + + for { + for i <= j && !(cmp(data[a], data[i]) < 0) { + i++ + } + for i <= j && (cmp(data[a], data[j]) < 0) { + j-- + } + if i > j { + break + } + data[i], data[j] = data[j], data[i] + i++ + j-- + } + ret i +} + +// Does one quicksort partition. +// Let p = data[pivot] +// Moves elements in data[a:b] around, so that data[i]

=p for inewpivot. +// On return, data[newpivot] = p +fn partitionFunc[S: []E, E](mut data: S, a: int, b: int, pivot: int, cmp: fn(a: E, b: E): int): (newpivot: int, alreadyPartitioned: bool) { + data[a], data[pivot] = data[pivot], data[a] + mut i, mut j := a + 1, b - 1 // i and j are inclusive of the elements remaining to be partitioned + + for i <= j && (cmp(data[i], data[a]) < 0) { + i++ + } + for i <= j && !(cmp(data[j], data[a]) < 0) { + j-- + } + if i > j { + data[j], data[a] = data[a], data[j] + ret j, true + } + data[i], data[j] = data[j], data[i] + i++ + j-- + + for { + for i <= j && (cmp(data[i], data[a]) < 0) { + i++ + } + for i <= j && !(cmp(data[j], data[a]) < 0) { + j-- + } + if i > j { + break + } + data[i], data[j] = data[j], data[i] + i++ + j-- + } + data[j], data[a] = data[a], data[j] + ret j, false +} + +// Sorts data[a:b]. +// The algorithm based on pattern-defeating quicksort(pdqsort), but without the optimizations from BlockQuicksort. +// pdqsort paper: https://arxiv.org/pdf/2106.05123.pdf +// C++ implementation: https://github.com/orlp/pdqsort +// Rust implementation: https://docs.rs/pdqsort/latest/pdqsort/ +// limit is the number of allowed bad (very unbalanced) pivots before falling back to heapsort. +fn pdqsortFunc[S: []E, E](mut data: S, mut a: int, mut b: int, mut limit: int, cmp: fn(a: E, b: E): int) { + const maxInsertion = 12 + + mut wasBalanced := true // whether the last partitioning was reasonably balanced + mut wasPartitioned := true // whether the slice was already partitioned + + for { + length := b - a + + if length <= maxInsertion { + insertionSortFunc(data, a, b, cmp) + ret + } + + // Fall back to heapsort if too many bad choices were made. + if limit == 0 { + heapSortFunc(data, a, b, cmp) + ret + } + + // If the last partitioning was imbalanced, we need to breaking patterns. + if !wasBalanced { + breakPatternsFunc(data, a, b, cmp) + limit-- + } + + mut pivot, mut hint := choosePivotFunc(data, a, b, cmp) + if hint == sortedHint.Decreasing { + reverseRangeFunc(data, a, b, cmp) + // The chosen pivot was pivot-a elements after the start of the array. + // After reversing it is pivot-a elements before the end of the array. + // The idea came from Rust's implementation. + pivot = (b - 1) - (pivot - a) + hint = sortedHint.Increasing + } + + // The slice is likely already sorted. + if wasBalanced && wasPartitioned && hint == sortedHint.Increasing { + if partialInsertionSortFunc(data, a, b, cmp) { + ret + } + } + + // Probably the slice contains many duplicate elements, partition the slice into + // elements equal to and elements greater than the pivot. + if a > 0 && !(cmp(data[a-1], data[pivot]) < 0) { + mid := partitionEqualFunc(data, a, b, pivot, cmp) + a = mid + continue + } + + mid, alreadyPartitioned := partitionFunc(data, a, b, pivot, cmp) + wasPartitioned = alreadyPartitioned + + leftLen, rightLen := mid - a, b - mid + balanceThreshold := length >> 3 + if leftLen < rightLen { + wasBalanced = leftLen >= balanceThreshold + pdqsortFunc(data, a, mid, limit, cmp) + a = mid + 1 + } else { + wasBalanced = rightLen >= balanceThreshold + pdqsortFunc(data, mid + 1, b, limit, cmp) + b = mid + } + } +} \ No newline at end of file diff --git a/std/slices/sortordered.jule b/std/slices/sortordered.jule new file mode 100644 index 000000000..8011a6e74 --- /dev/null +++ b/std/slices/sortordered.jule @@ -0,0 +1,365 @@ +// Copyright 2024 The Jule Programming Language. +// Use of this source code is governed by a BSD 3-Clause +// license that can be found in the LICENSE file. + +// The Jule code is a modified version of the original Go code from +// https://github.com/golang/go/blob/0df681248862a34ff1233cdc4cf0b036e4761652/src/slices/zsortordered.go and came with this notice. +// +// ==================================================== +// Copyright (c) 2009 The Go Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// ==================================================== + +use bits for std::math::bits +use cmp for std::internal::cmp + +enum sortedHint { + Unknown, + Increasing, + Decreasing, +} + +// Sorts data[a:b] using insertion sort. +fn insertionSort[E: ordered](mut &data: []E, a: int, b: int) { + mut i := a + 1 + for i < b; i++ { + mut j := i + for j > a && cmp::Less(data[j], data[j-1]); j-- { + data[j], data[j-1] = data[j-1], data[j] + } + } +} + +// Implements the heap property on data[lo:hi]. +// first is an offset into the array where the root of the heap lies. +fn siftDown[E: ordered](mut &data: []E, lo: int, hi: int, first: int) { + mut root := lo + for { + mut child := 2 * root + 1 + if child >= hi { + break + } + if child+1 < hi && cmp::Less(data[first+child], data[first+child+1]) { + child++ + } + if !cmp::Less(data[first+root], data[first+child]) { + ret + } + data[first+root], data[first+child] = data[first+child], data[first+root] + root = child + } +} + +fn heapSort[E: ordered](mut &data: []E, a: int, b: int) { + first := a + lo := 0 + hi := b - a + + // Build heap with greatest element at top. + mut i := (hi - 1) >> 1 + for i >= 0; i-- { + siftDown(data, i, hi, first) + } + + // Pop elements, largest first, into end of data. + i = hi - 1 + for i >= 0; i-- { + data[first], data[first+1] = data[first+1], data[first] + siftDown(data, lo, i, first) + } +} + +// breakPatternsOrdered scatters some elements around in an attempt to break some patterns +// that might cause imbalanced partitions in quicksort. +fn breakPatterns[E: ordered](mut &data: []E, a: int, b: int) { + length := b - a + if length >= 8 { + mut random := xorshift(length) + modulus := nextPowerOfTwo(length) + + mut idx := a + (length / 4) * 2 - 1 + for idx <= a+(length / 4)*2+1; idx++ { + mut other := int(uint(xorshiftNext(random)) & (modulus - 1)) + if other >= length { + other -= length + } + data[idx], data[a+other] = data[a+other], data[idx] + } + } +} + +// Returns x,y where data[x] <= data[y], where x,y=a,b or x,y=b,a. +fn order2O[E: ordered](data: []E, a: int, b: int, mut &swaps: int): (int, int) { + if cmp::Less(data[b], data[a]) { + swaps++ + ret b, a + } + ret a, b +} + +// Returns x where data[x] is the median of data[a],data[b],data[c], where x is a, b, or c. +fn median[E: ordered](data: []E, mut a: int, mut b: int, mut c: int, mut &swaps: int): int { + a, b = order2O(data, a, b, swaps) + b, c = order2O(data, b, c, swaps) + a, b = order2O(data, a, b, swaps) + ret b +} + +// Finds the median of data[a - 1], data[a], data[a + 1] and stores the index into a. +fn medianAdjacent[E: ordered](data: []E, a: int, mut &swaps: int): int { + ret median(data, a - 1, a, a + 1, swaps) +} + +// Chooses a pivot in data[a:b]. +// +// [0,8): chooses a static pivot. +// [8,shortestNinther): uses the simple median-of-three method. +// [shortestNinther,∞): uses the Tukey ninther method. +fn choosePivot[E: ordered](mut &data: []E, a: int, b: int): (pivot: int, hint: sortedHint) { + const shortestNinther = 50 + const maxSwaps = 4 * 3 + + l := b - a + + mut swaps := 0 + mut i := a + l / 4 * 1 + mut j := a + l / 4 * 2 + mut k := a + l / 4 * 3 + + if l >= 8 { + if l >= shortestNinther { + // Tukey ninther method, the idea came from Rust's implementation. + i = medianAdjacent(data, i, swaps) + j = medianAdjacent(data, j, swaps) + k = medianAdjacent(data, k, swaps) + } + // Find the median among i, j, k and stores it into j. + j = median(data, i, j, k, swaps) + } + + match swaps { + | 0: + ret j, sortedHint.Increasing + | maxSwaps: + ret j, sortedHint.Decreasing + |: + ret j, sortedHint.Unknown + } +} + +fn reverseRange[E: ordered](mut &data: []E, a: int, b: int) { + mut i := a + mut j := b - 1 + for i < j { + data[i], data[j] = data[j], data[i] + i++ + j-- + } +} + +// Partially sorts a slice, returns true if the slice is sorted at the end. +fn partialInsertionSort[E: ordered](mut &data: []E, a: int, b: int): bool { + const maxSteps = 5 // maximum number of adjacent out-of-order pairs that will get shifted + const shortestShifting = 50 // don't shift any elements on short arrays + mut i := a + 1 + mut j := 0 + for j < maxSteps; j++ { + for i < b && !cmp::Less(data[i], data[i-1]) { + i++ + } + + if i == b { + ret true + } + + if b-a < shortestShifting { + ret false + } + + data[i], data[i-1] = data[i-1], data[i] + + // Shift the smaller one to the left. + if i-a >= 2 { + mut z := i - 1 + for z >= 1; j-- { + if !cmp::Less(data[z], data[z-1]) { + break + } + data[z], data[z-1] = data[z-1], data[z] + } + } + // Shift the greater one to the right. + if b-i >= 2 { + mut z := i + 1 + for z < b; z++ { + if !cmp::Less(data[z], data[z-1]) { + break + } + data[z], data[z-1] = data[z-1], data[z] + } + } + } + ret false +} + +// Partitions data[a:b] into elements equal to data[pivot] followed by elements greater than data[pivot]. +// It assumed that data[a:b] does not contain elements smaller than the data[pivot]. +fn partitionEqual[E: ordered](mut &data: []E, a: int, b: int, pivot: int): (newpivot: int) { + data[a], data[pivot] = data[pivot], data[a] + mut i, mut j := a + 1, b - 1 // i and j are inclusive of the elements remaining to be partitioned + + for { + for i <= j && !cmp::Less(data[a], data[i]) { + i++ + } + for i <= j && cmp::Less(data[a], data[j]) { + j-- + } + if i > j { + break + } + data[i], data[j] = data[j], data[i] + i++ + j-- + } + ret i +} + +// Does one quicksort partition. +// let p = data[pivot] +// Moves elements in data[a:b] around, so that data[i]

=p for inewpivot. +// On return, data[newpivot] = p +fn partition[E: ordered](mut &data: []E, a: int, b: int, pivot: int): (newpivot: int, alreadyPartitioned: bool) { + data[a], data[pivot] = data[pivot], data[a] + mut i, mut j := a + 1, b - 1 // i and j are inclusive of the elements remaining to be partitioned + + for i <= j && cmp::Less(data[i], data[a]) { + i++ + } + for i <= j && !cmp::Less(data[j], data[a]) { + j-- + } + if i > j { + data[j], data[a] = data[a], data[j] + ret j, true + } + data[i], data[j] = data[j], data[i] + i++ + j-- + + for { + for i <= j && cmp::Less(data[i], data[a]) { + i++ + } + for i <= j && !cmp::Less(data[j], data[a]) { + j-- + } + if i > j { + break + } + data[i], data[j] = data[j], data[i] + i++ + j-- + } + data[j], data[a] = data[a], data[j] + ret j, false +} + +// Sorts data[a:b]. +// The algorithm based on pattern-defeating quicksort(pdqsort), but without the optimizations from BlockQuicksort. +// pdqsort paper: https://arxiv.org/pdf/2106.05123.pdf +// C++ implementation: https://github.com/orlp/pdqsort +// Rust implementation: https://docs.rs/pdqsort/latest/pdqsort/ +// limit is the number of allowed bad (very unbalanced) pivots before falling back to heapsort. +fn pdqsort[E: ordered](mut &data: []E, mut a: int, mut b: int, mut limit: int) { + const maxInsertion = 12 + + mut wasBalanced := true // whether the last partitioning was reasonably balanced + mut wasPartitioned := true // whether the slice was already partitioned + + for { + length := b - a + + if length <= maxInsertion { + insertionSort(data, a, b) + ret + } + + // Fall back to heapsort if too many bad choices were made. + if limit == 0 { + heapSort(data, a, b) + ret + } + + // If the last partitioning was imbalanced, we need to breaking patterns. + if !wasBalanced { + breakPatterns(data, a, b) + limit-- + } + + mut pivot, mut hint := choosePivot(data, a, b) + if hint == sortedHint.Decreasing { + reverseRange(data, a, b) + // The chosen pivot was pivot-a elements after the start of the array. + // After reversing it is pivot-a elements before the end of the array. + // The idea came from Rust's implementation. + pivot = (b - 1) - (pivot - a) + hint = sortedHint.Increasing + } + + // The slice is likely already sorted. + if wasBalanced && wasPartitioned && hint == sortedHint.Increasing { + if partialInsertionSort(data, a, b) { + ret + } + } + + // Probably the slice contains many duplicate elements, partition the slice into + // elements equal to and elements greater than the pivot. + if a > 0 && !cmp::Less(data[a-1], data[pivot]) { + mid := partitionEqual(data, a, b, pivot) + a = mid + continue + } + + mid, alreadyPartitioned := partition(data, a, b, pivot) + wasPartitioned = alreadyPartitioned + + leftLen, rightLen := mid - a, b - mid + balanceThreshold := length / 8 + if leftLen < rightLen { + wasBalanced = leftLen >= balanceThreshold + pdqsort(data, a, mid, limit) + a = mid + 1 + } else { + wasBalanced = rightLen >= balanceThreshold + pdqsort(data, mid + 1, b, limit) + b = mid + } + } +} \ No newline at end of file