Skip to content

Commit

Permalink
Add support for smallint to PrefixSort (facebookincubator#10946)
Browse files Browse the repository at this point in the history
Summary:
According to the benchmark, for data sets larger than 0.5k, PrefixSort outperforms std::sort with performance improvements ranging from approximately 250% to over 500%.  Here's a summary of the benchmark results:

| Dataset Size | PrefixSort Improvement (No Payload) |PrefixSort Improvement(With Payload) |
|--------------|-------------------------------------|-------------------------------------|
| 0.5k         | 248.97% - 287.43%                   | 249.71% - 289.74%                   |
| 1k           | 214.44% - 310.92%                   | 215.03% - 315.14%                   |
| 10k          | 216.21% - 255.38%                   | 217.88% - 256.88%                   |
| 100k         | 279.81% - 318.26%                   | 284.89% - 295.21%                   |
| 1000k        | 304.36% - 351.31%                   | 454.04% - 514.28%                   |

follow-up facebookincubator#8350
Part of facebookincubator#6766

Pull Request resolved: facebookincubator#10946

Reviewed By: Yuhta

Differential Revision: D62373593

Pulled By: mbasmanova

fbshipit-source-id: b8594e05cc6aee736d09db1695db770b84e9d4bd
  • Loading branch information
kevincmchen authored and facebook-github-bot committed Sep 12, 2024
1 parent 741876f commit 98bbb73
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 2 deletions.
4 changes: 4 additions & 0 deletions velox/exec/PrefixSort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ FOLLY_ALWAYS_INLINE void extractRowColumnToPrefix(
char* const row,
char* const prefix) {
switch (typeKind) {
case TypeKind::SMALLINT: {
encodeRowColumn<int16_t>(prefixSortLayout, index, rowColumn, row, prefix);
return;
}
case TypeKind::INTEGER: {
encodeRowColumn<int32_t>(prefixSortLayout, index, rowColumn, row, prefix);
return;
Expand Down
73 changes: 72 additions & 1 deletion velox/exec/benchmarks/PrefixSortBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,30 @@ class PrefixSortBenchmark {
}
}

std::vector<RowTypePtr> smallintRowTypes(bool noPayload) {
if (noPayload) {
return {
ROW({SMALLINT()}),
ROW({SMALLINT(), SMALLINT()}),
ROW({SMALLINT(), SMALLINT(), SMALLINT()}),
ROW({SMALLINT(), SMALLINT(), SMALLINT(), SMALLINT()}),
};
} else {
return {
ROW({SMALLINT(), VARCHAR(), VARCHAR()}),
ROW({SMALLINT(), SMALLINT(), VARCHAR(), VARCHAR()}),
ROW({SMALLINT(), SMALLINT(), SMALLINT(), VARCHAR(), VARCHAR()}),
ROW(
{SMALLINT(),
SMALLINT(),
SMALLINT(),
SMALLINT(),
VARCHAR(),
VARCHAR()}),
};
}
}

void bigint(
bool noPayload,
int numIterations,
Expand Down Expand Up @@ -296,6 +320,49 @@ class PrefixSortBenchmark {
"no-payloads", "varchar", batchSizes, rowTypes, numKeys, iterations);
}

void smallint(
bool noPayload,
int numIterations,
const std::vector<vector_size_t>& batchSizes) {
std::vector<RowTypePtr> rowTypes = smallintRowTypes(noPayload);
std::vector<int> numKeys = {1, 2, 3, 4};
benchmark(
noPayload ? "no-payload" : "payload",
"smallint",
batchSizes,
rowTypes,
numKeys,
numIterations);
}

void smallSmallint() {
// For small dateset, iterations need to be large enough to ensure that the
// benchmark runs for enough time.
const auto iterations = 100'000;
const std::vector<vector_size_t> batchSizes = {10, 50, 100, 500};
smallint(true, iterations, batchSizes);
}

void smallSmallintWithPayload() {
const auto iterations = 100'000;
const std::vector<vector_size_t> batchSizes = {10, 50, 100, 500};
smallint(false, iterations, batchSizes);
}

void largeSmallint() {
const auto iterations = 10;
const std::vector<vector_size_t> batchSizes = {
1'000, 10'000, 100'000, 1'000'000};
smallint(true, iterations, batchSizes);
}

void largeSmallintWithPayloads() {
const auto iterations = 10;
const std::vector<vector_size_t> batchSizes = {
1'000, 10'000, 100'000, 1'000'000};
smallint(false, iterations, batchSizes);
}

private:
std::vector<std::unique_ptr<TestCase>> testCases_;
memory::MemoryPool* pool_;
Expand All @@ -316,7 +383,11 @@ int main(int argc, char** argv) {
bm.largeBigintWithPayloads();
bm.smallBigintWithPayload();
bm.largeVarchar();
folly::runBenchmarks();
bm.smallSmallint();
bm.largeSmallint();
bm.smallSmallintWithPayload();
bm.largeSmallintWithPayloads();

folly::runBenchmarks();
return 0;
}
24 changes: 23 additions & 1 deletion velox/exec/prefixsort/PrefixSortEncoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class PrefixSortEncoder {
}

/// @tparam T Type of value. Supported type are: uint64_t, int64_t, uint32_t,
/// int32_t, float, double, Timestamp. TODO Add support for int16_t, uint16_t.
/// int32_t, int16_t, uint16_t, float, double, Timestamp.
template <typename T>
FOLLY_ALWAYS_INLINE void encodeNoNulls(T value, char* dest) const;

Expand All @@ -71,6 +71,9 @@ class PrefixSortEncoder {
FOLLY_ALWAYS_INLINE static std::optional<uint32_t> encodedSize(
TypeKind typeKind) {
switch ((typeKind)) {
case ::facebook::velox::TypeKind::SMALLINT: {
return 3;
}
case ::facebook::velox::TypeKind::INTEGER: {
return 5;
}
Expand Down Expand Up @@ -147,6 +150,25 @@ FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls(
encodeNoNulls((uint64_t)(value ^ (1ull << 63)), dest);
}

/// Logic is as same as int32_t.
template <>
FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls(
uint16_t value,
char* dest) const {
auto& v = *reinterpret_cast<uint16_t*>(dest);
v = __builtin_bswap16(value);
if (!ascending_) {
v = ~v;
}
}

template <>
FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls(
int16_t value,
char* dest) const {
encodeNoNulls(static_cast<uint16_t>(value ^ (1u << 15)), dest);
}

namespace detail {
/// Convert double to a uint64_t, their value comparison semantics remain
/// consistent.
Expand Down
17 changes: 17 additions & 0 deletions velox/exec/prefixsort/tests/PrefixEncoderTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,17 @@ TEST_F(PrefixEncoderTest, encode) {
testEncode<int32_t>(0x11223344, (char*)&ascExpected, (char*)&descExpected);
}

{
uint16_t ascExpected = 0x2211;
uint16_t descExpected = 0xddee;
testEncode<uint16_t>(0x1122, (char*)&ascExpected, (char*)&descExpected);
}
{
int16_t ascExpected = 0x2291;
int16_t descExpected = 0xdd6e;
testEncode<int16_t>(0x1122, (char*)&ascExpected, (char*)&descExpected);
}

{
uint32_t ascExpected = 0x0050c3c7;
uint32_t descExpected = 0xffaf3c38;
Expand Down Expand Up @@ -330,13 +341,19 @@ TEST_F(PrefixEncoderTest, encode) {
TEST_F(PrefixEncoderTest, compare) {
testCompare<uint64_t>();
testCompare<uint32_t>();
testCompare<uint16_t>();
testCompare<int64_t>();
testCompare<int32_t>();
testCompare<int16_t>();
testCompare<float>();
testCompare<double>();
testCompare<Timestamp>();
}

TEST_F(PrefixEncoderTest, fuzzySmallInt) {
testFuzz<TypeKind::SMALLINT>();
}

TEST_F(PrefixEncoderTest, fuzzyInteger) {
testFuzz<TypeKind::INTEGER>();
}
Expand Down

0 comments on commit 98bbb73

Please sign in to comment.