From 98bbb73ec9ee73853cc74668040990b5f85f3c92 Mon Sep 17 00:00:00 2001 From: kevincmchen Date: Thu, 12 Sep 2024 16:10:26 -0700 Subject: [PATCH] Add support for smallint to PrefixSort (#10946) Summary: According to the benchmark, for data sets larger than 0.5k, PrefixSort outperforms std::sort with performance improvements ranging from approximately 250% to over 500%. Here's a summary of the benchmark results: | Dataset Size | PrefixSort Improvement (No Payload) |PrefixSort Improvement(With Payload) | |--------------|-------------------------------------|-------------------------------------| | 0.5k | 248.97% - 287.43% | 249.71% - 289.74% | | 1k | 214.44% - 310.92% | 215.03% - 315.14% | | 10k | 216.21% - 255.38% | 217.88% - 256.88% | | 100k | 279.81% - 318.26% | 284.89% - 295.21% | | 1000k | 304.36% - 351.31% | 454.04% - 514.28% | follow-up https://github.com/facebookincubator/velox/issues/8350 Part of https://github.com/facebookincubator/velox/issues/6766 Pull Request resolved: https://github.com/facebookincubator/velox/pull/10946 Reviewed By: Yuhta Differential Revision: D62373593 Pulled By: mbasmanova fbshipit-source-id: b8594e05cc6aee736d09db1695db770b84e9d4bd --- velox/exec/PrefixSort.cpp | 4 + velox/exec/benchmarks/PrefixSortBenchmark.cpp | 73 ++++++++++++++++++- velox/exec/prefixsort/PrefixSortEncoder.h | 24 +++++- .../prefixsort/tests/PrefixEncoderTest.cpp | 17 +++++ 4 files changed, 116 insertions(+), 2 deletions(-) diff --git a/velox/exec/PrefixSort.cpp b/velox/exec/PrefixSort.cpp index 7fc0dff392c4..5d58ee9b032e 100644 --- a/velox/exec/PrefixSort.cpp +++ b/velox/exec/PrefixSort.cpp @@ -51,6 +51,10 @@ FOLLY_ALWAYS_INLINE void extractRowColumnToPrefix( char* const row, char* const prefix) { switch (typeKind) { + case TypeKind::SMALLINT: { + encodeRowColumn(prefixSortLayout, index, rowColumn, row, prefix); + return; + } case TypeKind::INTEGER: { encodeRowColumn(prefixSortLayout, index, rowColumn, row, prefix); return; diff --git a/velox/exec/benchmarks/PrefixSortBenchmark.cpp b/velox/exec/benchmarks/PrefixSortBenchmark.cpp index bb81eaa4c6aa..0af6a9f90bdf 100644 --- a/velox/exec/benchmarks/PrefixSortBenchmark.cpp +++ b/velox/exec/benchmarks/PrefixSortBenchmark.cpp @@ -238,6 +238,30 @@ class PrefixSortBenchmark { } } + std::vector smallintRowTypes(bool noPayload) { + if (noPayload) { + return { + ROW({SMALLINT()}), + ROW({SMALLINT(), SMALLINT()}), + ROW({SMALLINT(), SMALLINT(), SMALLINT()}), + ROW({SMALLINT(), SMALLINT(), SMALLINT(), SMALLINT()}), + }; + } else { + return { + ROW({SMALLINT(), VARCHAR(), VARCHAR()}), + ROW({SMALLINT(), SMALLINT(), VARCHAR(), VARCHAR()}), + ROW({SMALLINT(), SMALLINT(), SMALLINT(), VARCHAR(), VARCHAR()}), + ROW( + {SMALLINT(), + SMALLINT(), + SMALLINT(), + SMALLINT(), + VARCHAR(), + VARCHAR()}), + }; + } + } + void bigint( bool noPayload, int numIterations, @@ -296,6 +320,49 @@ class PrefixSortBenchmark { "no-payloads", "varchar", batchSizes, rowTypes, numKeys, iterations); } + void smallint( + bool noPayload, + int numIterations, + const std::vector& batchSizes) { + std::vector rowTypes = smallintRowTypes(noPayload); + std::vector numKeys = {1, 2, 3, 4}; + benchmark( + noPayload ? "no-payload" : "payload", + "smallint", + batchSizes, + rowTypes, + numKeys, + numIterations); + } + + void smallSmallint() { + // For small dateset, iterations need to be large enough to ensure that the + // benchmark runs for enough time. + const auto iterations = 100'000; + const std::vector batchSizes = {10, 50, 100, 500}; + smallint(true, iterations, batchSizes); + } + + void smallSmallintWithPayload() { + const auto iterations = 100'000; + const std::vector batchSizes = {10, 50, 100, 500}; + smallint(false, iterations, batchSizes); + } + + void largeSmallint() { + const auto iterations = 10; + const std::vector batchSizes = { + 1'000, 10'000, 100'000, 1'000'000}; + smallint(true, iterations, batchSizes); + } + + void largeSmallintWithPayloads() { + const auto iterations = 10; + const std::vector batchSizes = { + 1'000, 10'000, 100'000, 1'000'000}; + smallint(false, iterations, batchSizes); + } + private: std::vector> testCases_; memory::MemoryPool* pool_; @@ -316,7 +383,11 @@ int main(int argc, char** argv) { bm.largeBigintWithPayloads(); bm.smallBigintWithPayload(); bm.largeVarchar(); - folly::runBenchmarks(); + bm.smallSmallint(); + bm.largeSmallint(); + bm.smallSmallintWithPayload(); + bm.largeSmallintWithPayloads(); + folly::runBenchmarks(); return 0; } diff --git a/velox/exec/prefixsort/PrefixSortEncoder.h b/velox/exec/prefixsort/PrefixSortEncoder.h index 1323c43a4eb9..dec4b6166a4f 100644 --- a/velox/exec/prefixsort/PrefixSortEncoder.h +++ b/velox/exec/prefixsort/PrefixSortEncoder.h @@ -54,7 +54,7 @@ class PrefixSortEncoder { } /// @tparam T Type of value. Supported type are: uint64_t, int64_t, uint32_t, - /// int32_t, float, double, Timestamp. TODO Add support for int16_t, uint16_t. + /// int32_t, int16_t, uint16_t, float, double, Timestamp. template FOLLY_ALWAYS_INLINE void encodeNoNulls(T value, char* dest) const; @@ -71,6 +71,9 @@ class PrefixSortEncoder { FOLLY_ALWAYS_INLINE static std::optional encodedSize( TypeKind typeKind) { switch ((typeKind)) { + case ::facebook::velox::TypeKind::SMALLINT: { + return 3; + } case ::facebook::velox::TypeKind::INTEGER: { return 5; } @@ -147,6 +150,25 @@ FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls( encodeNoNulls((uint64_t)(value ^ (1ull << 63)), dest); } +/// Logic is as same as int32_t. +template <> +FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls( + uint16_t value, + char* dest) const { + auto& v = *reinterpret_cast(dest); + v = __builtin_bswap16(value); + if (!ascending_) { + v = ~v; + } +} + +template <> +FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls( + int16_t value, + char* dest) const { + encodeNoNulls(static_cast(value ^ (1u << 15)), dest); +} + namespace detail { /// Convert double to a uint64_t, their value comparison semantics remain /// consistent. diff --git a/velox/exec/prefixsort/tests/PrefixEncoderTest.cpp b/velox/exec/prefixsort/tests/PrefixEncoderTest.cpp index 11efd8ebc8a0..7f25b4d5cae2 100644 --- a/velox/exec/prefixsort/tests/PrefixEncoderTest.cpp +++ b/velox/exec/prefixsort/tests/PrefixEncoderTest.cpp @@ -303,6 +303,17 @@ TEST_F(PrefixEncoderTest, encode) { testEncode(0x11223344, (char*)&ascExpected, (char*)&descExpected); } + { + uint16_t ascExpected = 0x2211; + uint16_t descExpected = 0xddee; + testEncode(0x1122, (char*)&ascExpected, (char*)&descExpected); + } + { + int16_t ascExpected = 0x2291; + int16_t descExpected = 0xdd6e; + testEncode(0x1122, (char*)&ascExpected, (char*)&descExpected); + } + { uint32_t ascExpected = 0x0050c3c7; uint32_t descExpected = 0xffaf3c38; @@ -330,13 +341,19 @@ TEST_F(PrefixEncoderTest, encode) { TEST_F(PrefixEncoderTest, compare) { testCompare(); testCompare(); + testCompare(); testCompare(); testCompare(); + testCompare(); testCompare(); testCompare(); testCompare(); } +TEST_F(PrefixEncoderTest, fuzzySmallInt) { + testFuzz(); +} + TEST_F(PrefixEncoderTest, fuzzyInteger) { testFuzz(); }