Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds support for AVX2 for 32-bit types for quicksort and quickselect #60

Merged
merged 19 commits into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion _clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 60
PointerAlignment: Right
ReflowComments: false
SortIncludes: true
SortIncludes: false
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterTemplateKeyword: true
Expand Down
5 changes: 4 additions & 1 deletion examples/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
CXX ?= g++-12
CFLAGS = -I../src -std=c++17 -O3
EXE = argsort kvsort qsortfp16 qsort16 qsort32 qsort64
EXE = qsort32avx2 argsort kvsort qsortfp16 qsort16 qsort32 qsort64

default: all
all : $(EXE)
Expand All @@ -14,6 +14,9 @@ qsort16: avx512-16bit-qsort.cpp
qsort32: avx512-32bit-qsort.cpp
$(CXX) -o qsort32 -march=skylake-avx512 $(CFLAGS) avx512-32bit-qsort.cpp

qsort32avx2: avx2-32bit-qsort.cpp
$(CXX) -o qsort32avx2 -march=haswell $(CFLAGS) avx2-32bit-qsort.cpp

qsort64: avx512-64bit-qsort.cpp
$(CXX) -o qsort64 -march=skylake-avx512 $(CFLAGS) avx512-64bit-qsort.cpp

Expand Down
10 changes: 10 additions & 0 deletions examples/avx2-32bit-qsort.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#include "avx2-32bit-qsort.hpp"

int main() {
const int size = 1000;
float arr[size];
avx2_qsort(arr, size);
avx2_qselect(arr, 10, size);
avx2_partial_qsort(arr, 10, size);
return 0;
}
10 changes: 10 additions & 0 deletions lib/meson.build
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
libtargets = []

if cpp.has_argument('-march=haswell')
libtargets += static_library('libavx',
files(
'x86simdsort-avx2.cpp',
),
include_directories : [src],
cpp_args : ['-march=haswell', flags_hide_symbols],
)
endif

if cpp.has_argument('-march=skylake-avx512')
libtargets += static_library('libskx',
files(
Expand Down
28 changes: 28 additions & 0 deletions lib/x86simdsort-avx2.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// AVX2 specific routines:
#include "avx2-32bit-qsort.hpp"
#include "x86simdsort-internal.h"

#define DEFINE_ALL_METHODS(type) \
template <> \
void qsort(type *arr, size_t arrsize) \
{ \
avx2_qsort(arr, arrsize); \
} \
template <> \
void qselect(type *arr, size_t k, size_t arrsize, bool hasnan) \
{ \
avx2_qselect(arr, k, arrsize, hasnan); \
} \
template <> \
void partial_qsort(type *arr, size_t k, size_t arrsize, bool hasnan) \
{ \
avx2_partial_qsort(arr, k, arrsize, hasnan); \
}

namespace xss {
namespace avx2 {
DEFINE_ALL_METHODS(uint32_t)
DEFINE_ALL_METHODS(int32_t)
DEFINE_ALL_METHODS(float)
} // namespace avx512
} // namespace xss
51 changes: 36 additions & 15 deletions lib/x86simdsort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

static int check_cpu_feature_support(std::string_view cpufeature)
{
const char* disable_avx512 = std::getenv("XSS_DISABLE_AVX512");
const char *disable_avx512 = std::getenv("XSS_DISABLE_AVX512");

if ((cpufeature == "avx512_spr") && (!disable_avx512))
#ifdef __FLT16_MAX__
Expand Down Expand Up @@ -100,34 +100,40 @@ dispatch_requested(std::string_view cpurequested,
}

/* runtime dispatch mechanism */
#define DISPATCH(func, TYPE, ...) \
#define DISPATCH(func, TYPE, ISA) \
DECLARE_INTERNAL_##func(TYPE) static __attribute__((constructor)) void \
CAT(CAT(resolve_, func), TYPE)(void) \
{ \
CAT(CAT(internal_, func), TYPE) = &xss::scalar::func<TYPE>; \
__builtin_cpu_init(); \
std::string_view preferred_cpu = find_preferred_cpu({__VA_ARGS__}); \
if constexpr (dispatch_requested("avx512", {__VA_ARGS__})) { \
std::string_view preferred_cpu = find_preferred_cpu(ISA); \
if constexpr (dispatch_requested("avx512", ISA)) { \
if (preferred_cpu.find("avx512") != std::string_view::npos) { \
CAT(CAT(internal_, func), TYPE) = &xss::avx512::func<TYPE>; \
return; \
} \
} \
else if constexpr (dispatch_requested("avx2", {__VA_ARGS__})) { \
if constexpr (dispatch_requested("avx2", ISA)) { \
if (preferred_cpu.find("avx2") != std::string_view::npos) { \
CAT(CAT(internal_, func), TYPE) = &xss::avx2::func<TYPE>; \
return; \
} \
} \
}

#define ISA_LIST(...) \
std::initializer_list<std::string_view> \
{ \
__VA_ARGS__ \
}

namespace x86simdsort {
#ifdef __FLT16_MAX__
DISPATCH(qsort, _Float16, "avx512_spr")
DISPATCH(qselect, _Float16, "avx512_spr")
DISPATCH(partial_qsort, _Float16, "avx512_spr")
DISPATCH(argsort, _Float16, "none")
DISPATCH(argselect, _Float16, "none")
DISPATCH(qsort, _Float16, ISA_LIST("avx512_spr"))
DISPATCH(qselect, _Float16, ISA_LIST("avx512_spr"))
DISPATCH(partial_qsort, _Float16, ISA_LIST("avx512_spr"))
DISPATCH(argsort, _Float16, ISA_LIST("none"))
DISPATCH(argselect, _Float16, ISA_LIST("none"))
#endif

#define DISPATCH_ALL(func, ISA_16BIT, ISA_32BIT, ISA_64BIT) \
Expand All @@ -140,10 +146,25 @@ DISPATCH(argselect, _Float16, "none")
DISPATCH(func, uint64_t, ISA_64BIT) \
DISPATCH(func, double, ISA_64BIT)

DISPATCH_ALL(qsort, ("avx512_icl"), ("avx512_skx"), ("avx512_skx"))
DISPATCH_ALL(qselect, ("avx512_icl"), ("avx512_skx"), ("avx512_skx"))
DISPATCH_ALL(partial_qsort, ("avx512_icl"), ("avx512_skx"), ("avx512_skx"))
DISPATCH_ALL(argsort, "none", "avx512_skx", "avx512_skx")
DISPATCH_ALL(argselect, "none", "avx512_skx", "avx512_skx")
DISPATCH_ALL(qsort,
(ISA_LIST("avx512_icl")),
(ISA_LIST("avx512_skx", "avx2")),
(ISA_LIST("avx512_skx")))
DISPATCH_ALL(qselect,
(ISA_LIST("avx512_icl")),
(ISA_LIST("avx512_skx", "avx2")),
(ISA_LIST("avx512_skx")))
DISPATCH_ALL(partial_qsort,
(ISA_LIST("avx512_icl")),
(ISA_LIST("avx512_skx", "avx2")),
(ISA_LIST("avx512_skx")))
DISPATCH_ALL(argsort,
(ISA_LIST("none")),
(ISA_LIST("avx512_skx")),
(ISA_LIST("avx512_skx")))
DISPATCH_ALL(argselect,
(ISA_LIST("none")),
(ISA_LIST("avx512_skx")),
(ISA_LIST("avx512_skx")))

} // namespace x86simdsort
Loading