-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathjlavx512qsort.cpp
156 lines (129 loc) · 5.9 KB
/
jlavx512qsort.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#include "x86-simd-sort/src/avx512-64bit-qsort.hpp"
#include "x86-simd-sort/src/avx512-32bit-qsort.hpp"
#include "x86-simd-sort/src/avx512-16bit-qsort.hpp"
extern "C" void inplace_avx512_qsort_double(double *arr, int64_t arrsize) {
avx512_qsort<double>(arr, arrsize);
}
extern "C" void inplace_avx512_qsort_single(float *arr, int64_t arrsize) {
avx512_qsort<float>(arr, arrsize);
}
extern "C" void inplace_avx512_qsort_half(uint16_t *arr, int64_t arrsize) {
avx512_qsort_fp16(arr, arrsize);
}
extern "C" void inplace_avx512_qsort_int64(int64_t *arr, int64_t arrsize) {
avx512_qsort<int64_t>(arr, arrsize);
}
extern "C" void inplace_avx512_qsort_int32(int32_t *arr, int64_t arrsize) {
avx512_qsort<int32_t>(arr, arrsize);
}
extern "C" void inplace_avx512_qsort_int16(int16_t *arr, int64_t arrsize) {
avx512_qsort<int16_t>(arr, arrsize);
}
extern "C" void inplace_avx512_qsort_uint64(uint64_t *arr, int64_t arrsize) {
avx512_qsort<uint64_t>(arr, arrsize);
}
extern "C" void inplace_avx512_qsort_uint32(uint32_t *arr, int64_t arrsize) {
avx512_qsort<uint32_t>(arr, arrsize);
}
extern "C" void inplace_avx512_qsort_uint16(uint16_t *arr, int64_t arrsize) {
avx512_qsort<uint16_t>(arr, arrsize);
}
extern "C" void inplace_avx512_partialqsort_double(int64_t k, double *arr, int64_t arrsize) {
avx512_qselect<double>(arr, k-1, arrsize);
}
extern "C" void inplace_avx512_partialqsort_single(int64_t k, float *arr, int64_t arrsize) {
avx512_qselect<float>(arr, k-1, arrsize);
}
extern "C" void inplace_avx512_partialqsort_half(int64_t k, uint16_t *arr, int64_t arrsize) {
avx512_qselect_fp16(arr, k-1, arrsize);
}
extern "C" void inplace_avx512_partialqsort_int64(int64_t k, int64_t *arr, int64_t arrsize) {
avx512_qselect<int64_t>(arr, k-1, arrsize);
}
extern "C" void inplace_avx512_partialqsort_int32(int64_t k, int32_t *arr, int64_t arrsize) {
avx512_qselect<int32_t>(arr, k-1, arrsize);
}
extern "C" void inplace_avx512_partialqsort_int16(int64_t k, int16_t *arr, int64_t arrsize) {
avx512_qselect<int16_t>(arr, k-1, arrsize);
}
extern "C" void inplace_avx512_partialqsort_uint64(int64_t k, uint64_t *arr, int64_t arrsize) {
avx512_qselect<uint64_t>(arr, k-1, arrsize);
}
extern "C" void inplace_avx512_partialqsort_uint32(int64_t k, uint32_t *arr, int64_t arrsize) {
avx512_qselect<uint32_t>(arr, k-1, arrsize);
}
extern "C" void inplace_avx512_partialqsort_uint16(int64_t k, uint16_t *arr, int64_t arrsize) {
avx512_qselect<uint16_t>(arr, k-1, arrsize);
}
extern "C" void inplace_avx512_partialrangeqsort_double(int64_t kfrom, int64_t kto, double *arr, int64_t arrsize) {
if (kfrom > 1)
{
avx512_qselect<double>(arr, kfrom-1, arrsize);
avx512_partial_qsort<double>(arr+kfrom, kto-kfrom, arrsize-kfrom);
} else
avx512_partial_qsort<double>(arr, kto, arrsize);
}
extern "C" void inplace_avx512_partialrangeqsort_single(int64_t kfrom, int64_t kto, float *arr, int64_t arrsize) {
if (kfrom > 1)
{
avx512_qselect<float>(arr, kfrom-1, arrsize);
avx512_partial_qsort<float>(arr+kfrom, kto-kfrom, arrsize-kfrom);
} else
avx512_partial_qsort<float>(arr, kto, arrsize);
}
extern "C" void inplace_avx512_partialrangeqsort_half(int64_t kfrom, int64_t kto, uint16_t *arr, int64_t arrsize) {
if (kfrom > 1)
{
avx512_qselect_fp16(arr, kfrom-1, arrsize);
avx512_partial_qsort_fp16(arr+kfrom, kto-kfrom, arrsize-kfrom);
} else
avx512_partial_qsort_fp16(arr, kto, arrsize);
}
extern "C" void inplace_avx512_partialrangeqsort_int64(int64_t kfrom, int64_t kto, int64_t *arr, int64_t arrsize) {
if (kfrom > 1)
{
avx512_qselect<int64_t>(arr, kfrom-1, arrsize);
avx512_partial_qsort<int64_t>(arr+kfrom, kto-kfrom, arrsize-kfrom);
} else
avx512_partial_qsort<int64_t>(arr, kto, arrsize);
}
extern "C" void inplace_avx512_partialrangeqsort_int32(int64_t kfrom, int64_t kto, int32_t *arr, int64_t arrsize) {
if (kfrom > 1)
{
avx512_qselect<int32_t>(arr, kfrom-1, arrsize);
avx512_partial_qsort<int32_t>(arr+kfrom, kto-kfrom, arrsize-kfrom);
} else
avx512_partial_qsort<int32_t>(arr, kto, arrsize);
}
extern "C" void inplace_avx512_partialrangeqsort_int16(int64_t kfrom, int64_t kto, int16_t *arr, int64_t arrsize) {
if (kfrom > 1)
{
avx512_qselect<int16_t>(arr, kfrom-1, arrsize);
avx512_partial_qsort<int16_t>(arr+kfrom, kto-kfrom, arrsize-kfrom);
} else
avx512_partial_qsort<int16_t>(arr, kto, arrsize);
}
extern "C" void inplace_avx512_partialrangeqsort_uint64(int64_t kfrom, int64_t kto, uint64_t *arr, int64_t arrsize) {
if (kfrom > 1)
{
avx512_qselect<uint64_t>(arr, kfrom-1, arrsize);
avx512_partial_qsort<uint64_t>(arr+kfrom, kto-kfrom, arrsize-kfrom);
} else
avx512_partial_qsort<uint64_t>(arr, kto, arrsize);
}
extern "C" void inplace_avx512_partialrangeqsort_uint32(int64_t kfrom, int64_t kto, uint32_t *arr, int64_t arrsize) {
if (kfrom > 1)
{
avx512_qselect<uint32_t>(arr, kfrom-1, arrsize);
avx512_partial_qsort<uint32_t>(arr+kfrom, kto-kfrom, arrsize-kfrom);
} else
avx512_partial_qsort<uint32_t>(arr, kto, arrsize);
}
extern "C" void inplace_avx512_partialrangeqsort_uint16(int64_t kfrom, int64_t kto, uint16_t *arr, int64_t arrsize) {
if (kfrom > 1)
{
avx512_qselect<uint16_t>(arr, kfrom-1, arrsize);
avx512_partial_qsort<uint16_t>(arr+kfrom, kto-kfrom, arrsize-kfrom);
} else
avx512_partial_qsort<uint16_t>(arr, kto, arrsize);
}