From 9118a80d4f568ac2c915b4cfee03b92161f563f1 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Wed, 14 Feb 2024 17:12:23 +0100 Subject: [PATCH 01/10] Rename _clang-format to .clang-format --- _clang-format => .clang-format | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename _clang-format => .clang-format (100%) diff --git a/_clang-format b/.clang-format similarity index 100% rename from _clang-format rename to .clang-format From 196f0c7d063b8540217397efac4a6133cb3f19e8 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Wed, 14 Feb 2024 17:37:21 +0100 Subject: [PATCH 02/10] Update c-cpp.yml --- .github/workflows/c-cpp.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 8d7ade2b..c32e4ba0 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -7,6 +7,22 @@ on: branches: [ "main" ] jobs: + clang-format: + + runs-on: intel-ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Install dependencies + run: | + sudo apt update + sudo apt -y install clang-format + + - name: Linting + run: | + find . -type | grep -P ".*\.(c|cpp|h|hpp)" | xargs clang-format -style=file --dry-run + SKL-gcc9: runs-on: intel-ubuntu-latest From 59e6dfb6384b779e9b7da7b60f6d41505ccd75cb Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Wed, 14 Feb 2024 17:48:03 +0100 Subject: [PATCH 03/10] Update c-cpp.yml --- .github/workflows/c-cpp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index c32e4ba0..f35589b6 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -21,7 +21,7 @@ jobs: - name: Linting run: | - find . -type | grep -P ".*\.(c|cpp|h|hpp)" | xargs clang-format -style=file --dry-run + find . -type f | grep -P ".*\.(c|cpp|h|hpp)" | xargs clang-format -style=file --dry-run SKL-gcc9: From 4ca12d163bc74517181f104adee82f35dab3e1a0 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Wed, 14 Feb 2024 17:55:18 +0100 Subject: [PATCH 04/10] Update c-cpp.yml --- .github/workflows/c-cpp.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index f35589b6..93f81225 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -21,7 +21,8 @@ jobs: - name: Linting run: | - find . -type f | grep -P ".*\.(c|cpp|h|hpp)" | xargs clang-format -style=file --dry-run + pwd + find . -type f | grep -P ".*\.(c|cpp|h|hpp)" | xargs clang-format -style=file --dry-run -Werror SKL-gcc9: From f9eb017ef9134ca240dbace0b809a060b16ee931 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Wed, 14 Feb 2024 18:15:46 +0100 Subject: [PATCH 05/10] Update c-cpp.yml --- .github/workflows/c-cpp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 93f81225..769cce36 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -22,7 +22,7 @@ jobs: - name: Linting run: | pwd - find . -type f | grep -P ".*\.(c|cpp|h|hpp)" | xargs clang-format -style=file --dry-run -Werror + find . -type f | grep -P ".*\.(c|cpp|h|hpp)\b" | xargs clang-format -style=file --dry-run -Werror SKL-gcc9: From 9b2e38f2d8c6c53622671fb4aeecb63614471b3b Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Wed, 14 Feb 2024 18:21:50 +0100 Subject: [PATCH 06/10] Update c-cpp.yml --- .github/workflows/c-cpp.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 769cce36..871b4d50 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -21,7 +21,6 @@ jobs: - name: Linting run: | - pwd find . -type f | grep -P ".*\.(c|cpp|h|hpp)\b" | xargs clang-format -style=file --dry-run -Werror SKL-gcc9: From 2ca55e7f43dc8b0b3df17e1b868f052a59a4cd7f Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Wed, 14 Feb 2024 18:50:17 +0100 Subject: [PATCH 07/10] Create linting.yaml --- .github/workflows/linting.yaml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 .github/workflows/linting.yaml diff --git a/.github/workflows/linting.yaml b/.github/workflows/linting.yaml new file mode 100644 index 00000000..571fdd42 --- /dev/null +++ b/.github/workflows/linting.yaml @@ -0,0 +1,24 @@ +name: Lint + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +jobs: + clang-format: + + runs-on: intel-ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Install dependencies + run: | + sudo apt update + sudo apt -y install clang-format + + - name: Lint + run: | + find . -type f | grep -P ".*\.(c|cpp|h|hpp)\b" | xargs clang-format -style=file --dry-run -Werror From e12e425e8c239403d8e2f6ff55e7a04238849b99 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Wed, 14 Feb 2024 18:50:35 +0100 Subject: [PATCH 08/10] Rename linting.yaml to linting.yml --- .github/workflows/{linting.yaml => linting.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{linting.yaml => linting.yml} (100%) diff --git a/.github/workflows/linting.yaml b/.github/workflows/linting.yml similarity index 100% rename from .github/workflows/linting.yaml rename to .github/workflows/linting.yml From f9877b43c2fe579fde438130ff083a69cd8375f2 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Wed, 14 Feb 2024 18:50:53 +0100 Subject: [PATCH 09/10] Update c-cpp.yml --- .github/workflows/c-cpp.yml | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 871b4d50..8d7ade2b 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -7,22 +7,6 @@ on: branches: [ "main" ] jobs: - clang-format: - - runs-on: intel-ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - - name: Install dependencies - run: | - sudo apt update - sudo apt -y install clang-format - - - name: Linting - run: | - find . -type f | grep -P ".*\.(c|cpp|h|hpp)\b" | xargs clang-format -style=file --dry-run -Werror - SKL-gcc9: runs-on: intel-ubuntu-latest From c1559a3146d7ddb4d6b47863dda097670502a487 Mon Sep 17 00:00:00 2001 From: icfaust Date: Wed, 14 Feb 2024 12:04:28 -0800 Subject: [PATCH 10/10] apply clang-format --- benchmarks/bench-ipp.cpp | 9 ++++-- benchmarks/bench-objsort.hpp | 17 +++++----- benchmarks/bench-qsort.hpp | 3 +- benchmarks/bench-vqsort.cpp | 3 +- benchmarks/bench.h | 15 ++++----- examples/avx2-32bit-qsort.cpp | 3 +- examples/avx512-16bit-qsort.cpp | 3 +- examples/avx512-32bit-qsort.cpp | 3 +- examples/avx512-64bit-qsort.cpp | 3 +- examples/avx512-argsort.cpp | 3 +- examples/avx512-kv.cpp | 3 +- examples/avx512fp-16bit-qsort.cpp | 3 +- lib/x86simdsort-avx2.cpp | 2 +- lib/x86simdsort-internal.h | 9 ++++-- lib/x86simdsort-scalar.h | 31 +++++++++--------- lib/x86simdsort-skx.cpp | 15 +++++---- lib/x86simdsort.cpp | 36 ++++++++++++--------- lib/x86simdsort.h | 13 +++----- src/avx512-16bit-qsort.hpp | 5 +-- src/xss-common-qsort.h | 3 +- src/xss-network-keyvaluesort.hpp | 10 +++--- tests/test-qsort.cpp | 3 +- utils/custom-compare.h | 6 ++-- utils/custom-float.h | 5 ++- utils/rand_array.h | 52 ++++++++++++++----------------- 25 files changed, 130 insertions(+), 128 deletions(-) diff --git a/benchmarks/bench-ipp.cpp b/benchmarks/bench-ipp.cpp index d9d2abe1..2a90749d 100644 --- a/benchmarks/bench-ipp.cpp +++ b/benchmarks/bench-ipp.cpp @@ -63,13 +63,16 @@ static void ippargsort(benchmark::State &state, Args &&...args) // benchmark for (auto _ : state) { if constexpr (std::is_same_v) { - ippsSortRadixIndexAscend_32f(arr.data(), 4, arg.data(), arrsize, temp); + ippsSortRadixIndexAscend_32f( + arr.data(), 4, arg.data(), arrsize, temp); } else if constexpr (std::is_same_v) { - ippsSortRadixIndexAscend_64f(arr.data(), 8, arg.data(), arrsize, temp); + ippsSortRadixIndexAscend_64f( + arr.data(), 8, arg.data(), arrsize, temp); } else if constexpr (std::is_same_v) { - ippsSortRadixIndexAscend_32s(arr.data(), 4, arg.data(), arrsize, temp); + ippsSortRadixIndexAscend_32s( + arr.data(), 4, arg.data(), arrsize, temp); } state.PauseTiming(); arr = arr_bkp; diff --git a/benchmarks/bench-objsort.hpp b/benchmarks/bench-objsort.hpp index 10dbd043..4f5719a8 100644 --- a/benchmarks/bench-objsort.hpp +++ b/benchmarks/bench-objsort.hpp @@ -5,7 +5,7 @@ static constexpr char euclidean[] = "euclidean"; static constexpr char taxicab[] = "taxicab"; static constexpr char chebyshev[] = "chebyshev"; -template +template struct Point3D { T x; T y; @@ -19,9 +19,7 @@ struct Point3D { } T distance() { - if constexpr (name == "x") { - return x; - } + if constexpr (name == "x") { return x; } else if constexpr (name == "euclidean") { return std::sqrt(x * x + y * y + z * z); } @@ -77,9 +75,8 @@ static void simdobjsort(benchmark::State &state) std::vector arr_bkp = arr; // benchmark for (auto _ : state) { - x86simdsort::object_qsort(arr.data(), arr.size(), [](T p) { - return p.distance(); - }); + x86simdsort::object_qsort( + arr.data(), arr.size(), [](T p) { return p.distance(); }); state.PauseTiming(); if (!std::is_sorted(arr.begin(), arr.end(), less_than_key())) { std::cout << "sorting failed \n"; @@ -90,7 +87,7 @@ static void simdobjsort(benchmark::State &state) } #define BENCHMARK_OBJSORT(func, T, type, dist) \ - BENCHMARK_TEMPLATE(func, T) \ + BENCHMARK_TEMPLATE(func, T) \ ->Arg(10e1) \ ->Arg(10e2) \ ->Arg(10e3) \ @@ -101,12 +98,12 @@ static void simdobjsort(benchmark::State &state) #define BENCH_ALL(dtype) \ BENCHMARK_OBJSORT(simdobjsort, Point3D, dtype, x) \ BENCHMARK_OBJSORT(scalarobjsort, Point3D, dtype, x) \ - BENCHMARK_OBJSORT(simdobjsort, Point3D, dtype, taxicab ) \ + BENCHMARK_OBJSORT(simdobjsort, Point3D, dtype, taxicab) \ BENCHMARK_OBJSORT(scalarobjsort, Point3D, dtype, taxicab) \ BENCHMARK_OBJSORT(simdobjsort, Point3D, dtype, euclidean) \ BENCHMARK_OBJSORT(scalarobjsort, Point3D, dtype, euclidean) \ BENCHMARK_OBJSORT(simdobjsort, Point3D, dtype, chebyshev) \ - BENCHMARK_OBJSORT(scalarobjsort, Point3D, dtype, chebyshev) \ + BENCHMARK_OBJSORT(scalarobjsort, Point3D, dtype, chebyshev) BENCH_ALL(double) BENCH_ALL(float) diff --git a/benchmarks/bench-qsort.hpp b/benchmarks/bench-qsort.hpp index 4d974929..f95b05ba 100644 --- a/benchmarks/bench-qsort.hpp +++ b/benchmarks/bench-qsort.hpp @@ -36,10 +36,9 @@ static void simdsort(benchmark::State &state, Args &&...args) } } - #define BENCH_BOTH_QSORT(type) \ BENCH_SORT(simdsort, type) \ - BENCH_SORT(scalarsort, type) \ + BENCH_SORT(scalarsort, type) BENCH_BOTH_QSORT(uint64_t) BENCH_BOTH_QSORT(int64_t) diff --git a/benchmarks/bench-vqsort.cpp b/benchmarks/bench-vqsort.cpp index ad580a0b..d0dea2b1 100644 --- a/benchmarks/bench-vqsort.cpp +++ b/benchmarks/bench-vqsort.cpp @@ -14,7 +14,8 @@ static void vqsort(benchmark::State &state, Args &&...args) std::vector arr_bkp = arr; // benchmark for (auto _ : state) { - hwy::HWY_NAMESPACE::VQSortStatic(arr.data(), arrsize, hwy::SortAscending()); + hwy::HWY_NAMESPACE::VQSortStatic( + arr.data(), arrsize, hwy::SortAscending()); state.PauseTiming(); arr = arr_bkp; state.ResumeTiming(); diff --git a/benchmarks/bench.h b/benchmarks/bench.h index 076e919d..fdeb2cec 100644 --- a/benchmarks/bench.h +++ b/benchmarks/bench.h @@ -12,14 +12,10 @@ }))) #define BENCH_SORT(func, type) \ - MY_BENCHMARK_CAPTURE( \ - func, type, random_128, 128, std::string("random")); \ - MY_BENCHMARK_CAPTURE( \ - func, type, random_256, 256, std::string("random")); \ - MY_BENCHMARK_CAPTURE( \ - func, type, random_512, 512, std::string("random")); \ - MY_BENCHMARK_CAPTURE( \ - func, type, random_1k, 1024, std::string("random")); \ + MY_BENCHMARK_CAPTURE(func, type, random_128, 128, std::string("random")); \ + MY_BENCHMARK_CAPTURE(func, type, random_256, 256, std::string("random")); \ + MY_BENCHMARK_CAPTURE(func, type, random_512, 512, std::string("random")); \ + MY_BENCHMARK_CAPTURE(func, type, random_1k, 1024, std::string("random")); \ MY_BENCHMARK_CAPTURE(func, type, random_5k, 5000, std::string("random")); \ MY_BENCHMARK_CAPTURE( \ func, type, random_100k, 100000, std::string("random")); \ @@ -37,7 +33,8 @@ func, type, smallrange_512, 512, std::string("smallrange")); \ MY_BENCHMARK_CAPTURE( \ func, type, smallrange_1k, 1024, std::string("smallrange")); \ - MY_BENCHMARK_CAPTURE(func, type, smallrange_5k, 5000, std::string("smallrange")); \ + MY_BENCHMARK_CAPTURE( \ + func, type, smallrange_5k, 5000, std::string("smallrange")); \ MY_BENCHMARK_CAPTURE( \ func, type, smallrange_100k, 100000, std::string("smallrange")); \ MY_BENCHMARK_CAPTURE( \ diff --git a/examples/avx2-32bit-qsort.cpp b/examples/avx2-32bit-qsort.cpp index eddedca8..5e36aa22 100644 --- a/examples/avx2-32bit-qsort.cpp +++ b/examples/avx2-32bit-qsort.cpp @@ -1,6 +1,7 @@ #include "avx2-32bit-qsort.hpp" -int main() { +int main() +{ const int size = 1000; float arr[size]; avx2_qsort(arr, size); diff --git a/examples/avx512-16bit-qsort.cpp b/examples/avx512-16bit-qsort.cpp index 70de029a..9990402b 100644 --- a/examples/avx512-16bit-qsort.cpp +++ b/examples/avx512-16bit-qsort.cpp @@ -1,6 +1,7 @@ #include "avx512-16bit-qsort.hpp" -int main() { +int main() +{ const int size = 1000; short arr[size]; avx512_qsort(arr, size); diff --git a/examples/avx512-32bit-qsort.cpp b/examples/avx512-32bit-qsort.cpp index f842a7ad..8d8b8b7a 100644 --- a/examples/avx512-32bit-qsort.cpp +++ b/examples/avx512-32bit-qsort.cpp @@ -1,6 +1,7 @@ #include "avx512-32bit-qsort.hpp" -int main() { +int main() +{ const int size = 1000; float arr[size]; avx512_qsort(arr, size); diff --git a/examples/avx512-64bit-qsort.cpp b/examples/avx512-64bit-qsort.cpp index 328d9a6f..400f860a 100644 --- a/examples/avx512-64bit-qsort.cpp +++ b/examples/avx512-64bit-qsort.cpp @@ -1,6 +1,7 @@ #include "avx512-64bit-qsort.hpp" -int main() { +int main() +{ const int size = 1000; double arr[size]; avx512_qsort(arr, size); diff --git a/examples/avx512-argsort.cpp b/examples/avx512-argsort.cpp index a706cc48..cbe21066 100644 --- a/examples/avx512-argsort.cpp +++ b/examples/avx512-argsort.cpp @@ -1,6 +1,7 @@ #include "avx512-64bit-argsort.hpp" -int main() { +int main() +{ const int size = 1000; float arr[size]; std::vector arg1 = avx512_argsort(arr, size); diff --git a/examples/avx512-kv.cpp b/examples/avx512-kv.cpp index c789b7c8..f46a1020 100644 --- a/examples/avx512-kv.cpp +++ b/examples/avx512-kv.cpp @@ -1,6 +1,7 @@ #include "avx512-64bit-keyvaluesort.hpp" -int main() { +int main() +{ const int size = 1000; int64_t arr1[size]; uint64_t arr2[size]; diff --git a/examples/avx512fp-16bit-qsort.cpp b/examples/avx512fp-16bit-qsort.cpp index f561b84e..18e1c823 100644 --- a/examples/avx512fp-16bit-qsort.cpp +++ b/examples/avx512fp-16bit-qsort.cpp @@ -1,6 +1,7 @@ #include "avx512fp16-16bit-qsort.hpp" -int main() { +int main() +{ const int size = 1000; _Float16 arr[size]; avx512_qsort(arr, size); diff --git a/lib/x86simdsort-avx2.cpp b/lib/x86simdsort-avx2.cpp index 5588cffa..7700c9f4 100644 --- a/lib/x86simdsort-avx2.cpp +++ b/lib/x86simdsort-avx2.cpp @@ -20,7 +20,7 @@ void partial_qsort(type *arr, size_t k, size_t arrsize, bool hasnan) \ { \ avx2_partial_qsort(arr, k, arrsize, hasnan); \ - }\ + } \ template <> \ std::vector argsort(type *arr, size_t arrsize, bool hasnan) \ { \ diff --git a/lib/x86simdsort-internal.h b/lib/x86simdsort-internal.h index 550227e4..70f13daf 100644 --- a/lib/x86simdsort-internal.h +++ b/lib/x86simdsort-internal.h @@ -11,7 +11,8 @@ namespace avx512 { XSS_HIDE_SYMBOL void qsort(T *arr, size_t arrsize, bool hasnan = false); // key-value quicksort template - XSS_EXPORT_SYMBOL void keyvalue_qsort(T1 *key, T2* val, size_t arrsize, bool hasnan = false); + XSS_EXPORT_SYMBOL void + keyvalue_qsort(T1 *key, T2 *val, size_t arrsize, bool hasnan = false); // quickselect template XSS_HIDE_SYMBOL void @@ -35,7 +36,8 @@ namespace avx2 { XSS_HIDE_SYMBOL void qsort(T *arr, size_t arrsize, bool hasnan = false); // key-value quicksort template - XSS_EXPORT_SYMBOL void keyvalue_qsort(T1 *key, T2* val, size_t arrsize, bool hasnan = false); + XSS_EXPORT_SYMBOL void + keyvalue_qsort(T1 *key, T2 *val, size_t arrsize, bool hasnan = false); // quickselect template XSS_HIDE_SYMBOL void @@ -59,7 +61,8 @@ namespace scalar { XSS_HIDE_SYMBOL void qsort(T *arr, size_t arrsize, bool hasnan = false); // key-value quicksort template - XSS_EXPORT_SYMBOL void keyvalue_qsort(T1 *key, T2* val, size_t arrsize, bool hasnan = false); + XSS_EXPORT_SYMBOL void + keyvalue_qsort(T1 *key, T2 *val, size_t arrsize, bool hasnan = false); // quickselect template XSS_HIDE_SYMBOL void diff --git a/lib/x86simdsort-scalar.h b/lib/x86simdsort-scalar.h index 81d7e226..a5348106 100644 --- a/lib/x86simdsort-scalar.h +++ b/lib/x86simdsort-scalar.h @@ -4,25 +4,24 @@ namespace xss { namespace utils { -/* O(1) permute array in place: stolen from + /* O(1) permute array in place: stolen from * http://www.davidespataro.it/apply-a-permutation-to-a-vector */ -template -void apply_permutation_in_place(T* arr, std::vector arg) -{ - for(size_t i = 0 ; i < arg.size() ; i++) { - size_t curr = i; - size_t next = arg[curr]; - while(next != i) - { - std::swap(arr[curr], arr[next]); + template + void apply_permutation_in_place(T *arr, std::vector arg) + { + for (size_t i = 0; i < arg.size(); i++) { + size_t curr = i; + size_t next = arg[curr]; + while (next != i) { + std::swap(arr[curr], arr[next]); + arg[curr] = curr; + curr = next; + next = arg[next]; + } arg[curr] = curr; - curr = next; - next = arg[next]; } - arg[curr] = curr; } -} -} // utils +} // namespace utils namespace scalar { template @@ -79,7 +78,7 @@ namespace scalar { return arg; } template - void keyvalue_qsort(T1 *key, T2* val, size_t arrsize, bool hasnan) + void keyvalue_qsort(T1 *key, T2 *val, size_t arrsize, bool hasnan) { std::vector arg = argsort(key, arrsize, hasnan); utils::apply_permutation_in_place(key, arg); diff --git a/lib/x86simdsort-skx.cpp b/lib/x86simdsort-skx.cpp index 02faa90f..11145e3a 100644 --- a/lib/x86simdsort-skx.cpp +++ b/lib/x86simdsort-skx.cpp @@ -35,36 +35,35 @@ #define DEFINE_KEYVALUE_METHODS(type) \ template <> \ - void keyvalue_qsort(type *key, uint64_t* val, size_t arrsize, bool hasnan) \ + void keyvalue_qsort(type *key, uint64_t *val, size_t arrsize, bool hasnan) \ { \ avx512_qsort_kv(key, val, arrsize, hasnan); \ } \ template <> \ - void keyvalue_qsort(type *key, int64_t* val, size_t arrsize, bool hasnan) \ + void keyvalue_qsort(type *key, int64_t *val, size_t arrsize, bool hasnan) \ { \ avx512_qsort_kv(key, val, arrsize, hasnan); \ } \ template <> \ - void keyvalue_qsort(type *key, double* val, size_t arrsize, bool hasnan) \ + void keyvalue_qsort(type *key, double *val, size_t arrsize, bool hasnan) \ { \ avx512_qsort_kv(key, val, arrsize, hasnan); \ } \ template <> \ - void keyvalue_qsort(type *key, uint32_t* val, size_t arrsize, bool hasnan) \ + void keyvalue_qsort(type *key, uint32_t *val, size_t arrsize, bool hasnan) \ { \ avx512_qsort_kv(key, val, arrsize, hasnan); \ } \ template <> \ - void keyvalue_qsort(type *key, int32_t* val, size_t arrsize, bool hasnan) \ + void keyvalue_qsort(type *key, int32_t *val, size_t arrsize, bool hasnan) \ { \ avx512_qsort_kv(key, val, arrsize, hasnan); \ } \ template <> \ - void keyvalue_qsort(type *key, float* val, size_t arrsize, bool hasnan) \ + void keyvalue_qsort(type *key, float *val, size_t arrsize, bool hasnan) \ { \ avx512_qsort_kv(key, val, arrsize, hasnan); \ - } \ - + } namespace xss { namespace avx512 { diff --git a/lib/x86simdsort.cpp b/lib/x86simdsort.cpp index f088e4cd..9f8fff84 100644 --- a/lib/x86simdsort.cpp +++ b/lib/x86simdsort.cpp @@ -122,30 +122,36 @@ namespace x86simdsort { return; \ } \ } \ - } \ + } #define DISPATCH_KEYVALUE_SORT(TYPE1, TYPE2, ISA) \ - static void (CAT(CAT(*internal_kv_qsort_, TYPE1), TYPE2))(TYPE1*, TYPE2*, size_t, bool) = NULL; \ + static void(CAT(CAT(*internal_kv_qsort_, TYPE1), TYPE2))( \ + TYPE1 *, TYPE2 *, size_t, bool) \ + = NULL; \ template <> \ - void keyvalue_qsort(TYPE1 *key, TYPE2* val, size_t arrsize, bool hasnan) \ + void keyvalue_qsort(TYPE1 *key, TYPE2 *val, size_t arrsize, bool hasnan) \ { \ - (CAT(CAT(*internal_kv_qsort_, TYPE1), TYPE2))(key, val, arrsize, hasnan); \ + (CAT(CAT(*internal_kv_qsort_, TYPE1), TYPE2))( \ + key, val, arrsize, hasnan); \ } \ - static __attribute__((constructor)) void \ - CAT(CAT(resolve_keyvalue_qsort_, TYPE1), TYPE2)(void) \ + static __attribute__((constructor)) void CAT( \ + CAT(resolve_keyvalue_qsort_, TYPE1), TYPE2)(void) \ { \ - CAT(CAT(internal_kv_qsort_, TYPE1), TYPE2) = &xss::scalar::keyvalue_qsort; \ + CAT(CAT(internal_kv_qsort_, TYPE1), TYPE2) \ + = &xss::scalar::keyvalue_qsort; \ __builtin_cpu_init(); \ std::string_view preferred_cpu = find_preferred_cpu(ISA); \ if constexpr (dispatch_requested("avx512", ISA)) { \ if (preferred_cpu.find("avx512") != std::string_view::npos) { \ - CAT(CAT(internal_kv_qsort_, TYPE1), TYPE2) = &xss::avx512::keyvalue_qsort; \ + CAT(CAT(internal_kv_qsort_, TYPE1), TYPE2) \ + = &xss::avx512::keyvalue_qsort; \ return; \ } \ } \ if constexpr (dispatch_requested("avx2", ISA)) { \ if (preferred_cpu.find("avx2") != std::string_view::npos) { \ - CAT(CAT(internal_kv_qsort_, TYPE1), TYPE2) = &xss::avx2::keyvalue_qsort; \ + CAT(CAT(internal_kv_qsort_, TYPE1), TYPE2) \ + = &xss::avx2::keyvalue_qsort; \ return; \ } \ } \ @@ -197,12 +203,12 @@ DISPATCH_ALL(argselect, (ISA_LIST("avx512_skx", "avx2"))) #define DISPATCH_KEYVALUE_SORT_FORTYPE(type) \ - DISPATCH_KEYVALUE_SORT(type, uint64_t, (ISA_LIST("avx512_skx")))\ - DISPATCH_KEYVALUE_SORT(type, int64_t, (ISA_LIST("avx512_skx")))\ - DISPATCH_KEYVALUE_SORT(type, double, (ISA_LIST("avx512_skx")))\ - DISPATCH_KEYVALUE_SORT(type, uint32_t, (ISA_LIST("avx512_skx")))\ - DISPATCH_KEYVALUE_SORT(type, int32_t, (ISA_LIST("avx512_skx")))\ - DISPATCH_KEYVALUE_SORT(type, float, (ISA_LIST("avx512_skx")))\ + DISPATCH_KEYVALUE_SORT(type, uint64_t, (ISA_LIST("avx512_skx"))) \ + DISPATCH_KEYVALUE_SORT(type, int64_t, (ISA_LIST("avx512_skx"))) \ + DISPATCH_KEYVALUE_SORT(type, double, (ISA_LIST("avx512_skx"))) \ + DISPATCH_KEYVALUE_SORT(type, uint32_t, (ISA_LIST("avx512_skx"))) \ + DISPATCH_KEYVALUE_SORT(type, int32_t, (ISA_LIST("avx512_skx"))) \ + DISPATCH_KEYVALUE_SORT(type, float, (ISA_LIST("avx512_skx"))) DISPATCH_KEYVALUE_SORT_FORTYPE(uint64_t) DISPATCH_KEYVALUE_SORT_FORTYPE(int64_t) diff --git a/lib/x86simdsort.h b/lib/x86simdsort.h index 5297f799..e7bd80f2 100644 --- a/lib/x86simdsort.h +++ b/lib/x86simdsort.h @@ -39,7 +39,7 @@ argselect(T *arr, size_t k, size_t arrsize, bool hasnan = false); // keyvalue sort template XSS_EXPORT_SYMBOL void -keyvalue_qsort(T1 *key, T2* val, size_t arrsize, bool hasnan = false); +keyvalue_qsort(T1 *key, T2 *val, size_t arrsize, bool hasnan = false); // sort an object template @@ -61,17 +61,12 @@ XSS_EXPORT_SYMBOL void object_qsort(T *arr, uint32_t arrsize, Func key_func) /* (3) Permute obj array in-place */ std::vector done(arrsize); - for (size_t i = 0; i < arrsize; ++i) - { - if (done[i]) - { - continue; - } + for (size_t i = 0; i < arrsize; ++i) { + if (done[i]) { continue; } done[i] = true; size_t prev_j = i; size_t j = arg[i]; - while (i != j) - { + while (i != j) { std::swap(arr[prev_j], arr[j]); done[j] = true; prev_j = j; diff --git a/src/avx512-16bit-qsort.hpp b/src/avx512-16bit-qsort.hpp index 32d7419c..d9f0bd5e 100644 --- a/src/avx512-16bit-qsort.hpp +++ b/src/avx512-16bit-qsort.hpp @@ -502,9 +502,10 @@ replace_nan_with_inf>(uint16_t *arr, arrsize_t arrsize) { arrsize_t nan_count = 0; __mmask16 loadmask = 0xFFFF; - for (arrsize_t ii = 0; ii < arrsize; ii = ii + zmm_vector::numlanes / 2) { + for (arrsize_t ii = 0; ii < arrsize; + ii = ii + zmm_vector::numlanes / 2) { if (arrsize - ii < 16) { - loadmask = (0x0001 << (arrsize-ii)) - 0x0001; + loadmask = (0x0001 << (arrsize - ii)) - 0x0001; } __m256i in_zmm = _mm256_maskz_loadu_epi16(loadmask, arr); __m512 in_zmm_asfloat = _mm512_cvtph_ps(in_zmm); diff --git a/src/xss-common-qsort.h b/src/xss-common-qsort.h index 7b89ba21..097efceb 100644 --- a/src/xss-common-qsort.h +++ b/src/xss-common-qsort.h @@ -87,7 +87,8 @@ X86_SIMD_SORT_INLINE bool array_has_nan(type_t *arr, arrsize_t size) else { in = vtype::loadu(arr + ii); } - auto nanmask = vtype::convert_mask_to_int(vtype::template fpclass<0x01 | 0x80>(in)); + auto nanmask = vtype::convert_mask_to_int( + vtype::template fpclass<0x01 | 0x80>(in)); if (nanmask != 0x00) { found_nan = true; break; diff --git a/src/xss-network-keyvaluesort.hpp b/src/xss-network-keyvaluesort.hpp index 1cbbc159..a20da171 100644 --- a/src/xss-network-keyvaluesort.hpp +++ b/src/xss-network-keyvaluesort.hpp @@ -441,9 +441,8 @@ bitonic_fullmerge_n_vec(typename keyType::reg_t *keys, } template -X86_SIMD_SORT_INLINE void argsort_n_vec(typename keyType::type_t *keys, - arrsize_t *indices, - int N) +X86_SIMD_SORT_INLINE void +argsort_n_vec(typename keyType::type_t *keys, arrsize_t *indices, int N) { using kreg_t = typename keyType::reg_t; using ireg_t = typename indexType::reg_t; @@ -586,9 +585,8 @@ X86_SIMD_SORT_INLINE void kvsort_n_vec(typename keyType::type_t *keys, } template -X86_SIMD_SORT_INLINE void argsort_n(typename keyType::type_t *keys, - arrsize_t *indices, - int N) +X86_SIMD_SORT_INLINE void +argsort_n(typename keyType::type_t *keys, arrsize_t *indices, int N) { static_assert(keyType::numlanes == indexType::numlanes, "invalid pairing of value/index types"); diff --git a/tests/test-qsort.cpp b/tests/test-qsort.cpp index abf871a3..cffcfb32 100644 --- a/tests/test-qsort.cpp +++ b/tests/test-qsort.cpp @@ -93,7 +93,8 @@ TYPED_TEST_P(simdsort, test_argselect) std::sort(sortedarr.begin(), sortedarr.end(), compare>()); - auto arg = x86simdsort::argselect(arr.data(), k, arr.size(), hasnan); + auto arg + = x86simdsort::argselect(arr.data(), k, arr.size(), hasnan); IS_ARG_PARTITIONED(arr, arg, sortedarr[k], k, type); arr.clear(); sortedarr.clear(); diff --git a/utils/custom-compare.h b/utils/custom-compare.h index d99f0491..ab8df85c 100644 --- a/utils/custom-compare.h +++ b/utils/custom-compare.h @@ -12,7 +12,7 @@ struct compare { { if constexpr (xss::fp::is_floating_point_v) { T inf = xss::fp::infinity(); - T one = (T) 1.0; + T one = (T)1.0; if (!xss::fp::isunordered(a, b)) { return op(a, b); } else if ((xss::fp::isnan(a)) && (!xss::fp::isnan(b))) { return b == inf ? op(inf, one) : op(inf, b); @@ -32,7 +32,7 @@ struct compare { template struct compare_arg { - compare_arg(const T* arr) + compare_arg(const T *arr) { this->arr = arr; } @@ -40,5 +40,5 @@ struct compare_arg { { return compare()(arr[a], arr[b]); } - const T* arr; + const T *arr; }; diff --git a/utils/custom-float.h b/utils/custom-float.h index 291912b9..5faaa9e8 100644 --- a/utils/custom-float.h +++ b/utils/custom-float.h @@ -2,8 +2,7 @@ #define UTILS_FLOAT #include namespace xss { -namespace fp -{ +namespace fp { template inline constexpr bool is_floating_point_v = std::is_floating_point_v; @@ -86,6 +85,6 @@ namespace fp } #endif -} // namespace float +} // namespace fp } // namespace xss #endif diff --git a/utils/rand_array.h b/utils/rand_array.h index b0119fbd..a9703551 100644 --- a/utils/rand_array.h +++ b/utils/rand_array.h @@ -13,14 +13,13 @@ #include "custom-float.h" template -static std::vector get_uniform_rand_array( - int64_t arrsize, - T max = xss::fp::max(), - T min = xss::fp::min()) +static std::vector get_uniform_rand_array(int64_t arrsize, + T max = xss::fp::max(), + T min = xss::fp::min()) { std::vector arr; std::random_device rd; - if constexpr(std::is_floating_point_v) { + if constexpr (std::is_floating_point_v) { std::mt19937 gen(rd()); #ifndef XSS_DO_NOT_SET_SEED gen.seed(42); @@ -31,15 +30,16 @@ static std::vector get_uniform_rand_array( } } #ifdef __FLT16_MAX__ - else if constexpr(std::is_same_v) { - (void)(max); (void)(min); + else if constexpr (std::is_same_v) { + (void)(max); + (void)(min); for (auto jj = 0; jj < arrsize; ++jj) { float temp = (float)rand() / (float)(RAND_MAX); arr.push_back((_Float16)temp); } } #endif - else if constexpr(std::is_integral_v) { + else if constexpr (std::is_integral_v) { std::default_random_engine e1(rd()); #ifndef XSS_DO_NOT_SET_SEED e1.seed(42); @@ -53,10 +53,8 @@ static std::vector get_uniform_rand_array( } template -static std::vector -get_uniform_rand_array_with_uniquevalues(int64_t arrsize, - T max = xss::fp::max(), - T min = xss::fp::min()) +static std::vector get_uniform_rand_array_with_uniquevalues( + int64_t arrsize, T max = xss::fp::max(), T min = xss::fp::min()) { std::vector arr = get_uniform_rand_array(arrsize, max, min); typename std::vector::iterator ip @@ -66,14 +64,15 @@ get_uniform_rand_array_with_uniquevalues(int64_t arrsize, } template -static std::vector -get_array(std::string arrtype, - size_t arrsize, - T min = xss::fp::min(), - T max = xss::fp::max()) +static std::vector get_array(std::string arrtype, + size_t arrsize, + T min = xss::fp::min(), + T max = xss::fp::max()) { std::vector arr; - if (arrtype == "random") { arr = get_uniform_rand_array(arrsize, max, min); } + if (arrtype == "random") { + arr = get_uniform_rand_array(arrsize, max, min); + } else if (arrtype == "sorted") { arr = get_uniform_rand_array(arrsize, max, min); std::sort(arr.begin(), arr.end()); @@ -93,14 +92,12 @@ get_array(std::string arrtype, arr = get_uniform_rand_array(arrsize, 20, 1); } else if (arrtype == "random_5d") { - size_t temp = std::max((size_t) 1, (size_t) (0.5 * arrsize)); + size_t temp = std::max((size_t)1, (size_t)(0.5 * arrsize)); std::vector temparr = get_uniform_rand_array(temp); for (size_t ii = 0; ii < arrsize; ++ii) { - if (ii < temp) { - arr.push_back(temparr[ii]); - } + if (ii < temp) { arr.push_back(temparr[ii]); } else { - arr.push_back((T) 0); + arr.push_back((T)0); } } std::shuffle(arr.begin(), arr.end(), std::default_random_engine(42)); @@ -118,7 +115,7 @@ get_array(std::string arrtype, arr = get_uniform_rand_array(arrsize, max, min); int64_t num_nans = 10 % arrsize; std::vector rand_indx - = get_uniform_rand_array(num_nans, arrsize-1, 0); + = get_uniform_rand_array(num_nans, arrsize - 1, 0); T val; if constexpr (xss::fp::is_floating_point_v) { val = xss::fp::quiet_NaN(); @@ -140,13 +137,12 @@ get_array(std::string arrtype, val = std::numeric_limits::max(); } for (size_t ii = 1; ii <= arrsize; ++ii) { - if (rand() % 0x1) { - arr[ii] = val; - } + if (rand() % 0x1) { arr[ii] = val; } } } else { - std::cout << "Warning: unrecognized array type " << arrtype << std::endl; + std::cout << "Warning: unrecognized array type " << arrtype + << std::endl; } return arr; }