diff --git a/velox/exec/PrefixSort.cpp b/velox/exec/PrefixSort.cpp index 8f350aae15f3..8488194c72e8 100644 --- a/velox/exec/PrefixSort.cpp +++ b/velox/exec/PrefixSort.cpp @@ -81,6 +81,11 @@ FOLLY_ALWAYS_INLINE void extractRowColumnToPrefix( prefixSortLayout, index, rowColumn, row, prefixBuffer); return; } + case TypeKind::HUGEINT: { + encodeRowColumn( + prefixSortLayout, index, rowColumn, row, prefixBuffer); + return; + } default: VELOX_UNSUPPORTED( "prefix-sort does not support type kind: {}", diff --git a/velox/exec/benchmarks/PrefixSortBenchmark.cpp b/velox/exec/benchmarks/PrefixSortBenchmark.cpp index 962d63a8740e..692d4f72926b 100644 --- a/velox/exec/benchmarks/PrefixSortBenchmark.cpp +++ b/velox/exec/benchmarks/PrefixSortBenchmark.cpp @@ -307,6 +307,21 @@ class PrefixSortBenchmark { bigint(false, iterations, batchSizes); } + void hugeInt() { + const auto iterations = 10; + const std::vector batchSizes = { + 1'000, 10'000, 100'000, 1'000'000}; + std::vector rowTypes = { + ROW({DECIMAL(23, 2)}), + ROW({DECIMAL(30, 2), DECIMAL(32, 5)}), + ROW({DECIMAL(19, 5), DECIMAL(34, 8), DECIMAL(38, 2)}), + ROW({DECIMAL(30, 2), DECIMAL(24, 3), DECIMAL(32, 5), DECIMAL(34, 3)}), + }; + std::vector numKeys = {1, 2, 3, 4}; + benchmark( + "no-payloads", "hugeint", batchSizes, rowTypes, numKeys, iterations); + } + void largeVarchar() { const auto iterations = 10; const std::vector batchSizes = { @@ -382,6 +397,7 @@ int main(int argc, char** argv) { bm.smallBigint(); bm.largeBigint(); + bm.hugeInt(); bm.largeBigintWithPayloads(); bm.smallBigintWithPayload(); bm.largeVarchar(); diff --git a/velox/exec/prefixsort/PrefixSortEncoder.h b/velox/exec/prefixsort/PrefixSortEncoder.h index 40f8cc4f8dff..c4d356ec53d4 100644 --- a/velox/exec/prefixsort/PrefixSortEncoder.h +++ b/velox/exec/prefixsort/PrefixSortEncoder.h @@ -88,6 +88,9 @@ class PrefixSortEncoder { case ::facebook::velox::TypeKind::TIMESTAMP: { return 17; } + case ::facebook::velox::TypeKind::HUGEINT: { + return 17; + } default: return std::nullopt; } @@ -168,6 +171,14 @@ FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls( encodeNoNulls(static_cast(value ^ (1u << 15)), dest); } +template <> +FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls( + int128_t value, + char* dest) const { + encodeNoNulls(HugeInt::upper(value), dest); + encodeNoNulls(HugeInt::lower(value), dest + sizeof(int64_t)); +} + namespace detail { /// Convert double to a uint64_t, their value comparison semantics remain /// consistent. diff --git a/velox/exec/prefixsort/tests/PrefixEncoderTest.cpp b/velox/exec/prefixsort/tests/PrefixEncoderTest.cpp index 7f25b4d5cae2..dc21fabea7c1 100644 --- a/velox/exec/prefixsort/tests/PrefixEncoderTest.cpp +++ b/velox/exec/prefixsort/tests/PrefixEncoderTest.cpp @@ -326,6 +326,15 @@ TEST_F(PrefixEncoderTest, encode) { testEncode(100000.00, (char*)&ascExpected, (char*)&descExpected); } + { + char ascExpected[16] = { + -128, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, -56}; + char descExpected[16] = { + 127, -1, -1, -1, -1, -1, -1, -11, -1, -1, -1, -1, -1, -1, -1, 55}; + int128_t value = HugeInt::build(10, 200); + testEncode(value, (char*)ascExpected, (char*)descExpected); + } + { Timestamp value = Timestamp(0x000000011223344, 0x000000011223344); uint64_t ascExpected[2]; @@ -345,6 +354,7 @@ TEST_F(PrefixEncoderTest, compare) { testCompare(); testCompare(); testCompare(); + testCompare(); testCompare(); testCompare(); testCompare(); @@ -362,6 +372,10 @@ TEST_F(PrefixEncoderTest, fuzzyBigint) { testFuzz(); } +TEST_F(PrefixEncoderTest, fuzzyHugeInt) { + testFuzz(); +} + TEST_F(PrefixEncoderTest, fuzzyReal) { testFuzz(); } diff --git a/velox/exec/tests/PrefixSortTest.cpp b/velox/exec/tests/PrefixSortTest.cpp index b5bf56fa70b4..248d1473bd0d 100644 --- a/velox/exec/tests/PrefixSortTest.cpp +++ b/velox/exec/tests/PrefixSortTest.cpp @@ -159,6 +159,12 @@ TEST_F(PrefixSortTest, singleKey) { makeFlatVector({5, 4, 3, 2, 1}), makeFlatVector({5, 4, 3, 2, 1}), makeFlatVector({5, 4, 3, 2, 1}), + makeFlatVector( + {5, + HugeInt::parse("1234567"), + HugeInt::parse("12345678901234567890"), + HugeInt::parse("12345679"), + HugeInt::parse("-12345678901234567890")}), makeFlatVector({5.5, 4.4, 3.3, 2.2, 1.1}), makeFlatVector({5.5, 4.4, 3.3, 2.2, 1.1}), makeFlatVector( @@ -186,6 +192,12 @@ TEST_F(PrefixSortTest, singleKeyWithNulls) { makeNullableFlatVector({5, 4, std::nullopt, 2, 1}), makeNullableFlatVector({5, 4, std::nullopt, 2, 1}), makeNullableFlatVector({5, 4, std::nullopt, 2, 1}), + makeNullableFlatVector( + {5, + HugeInt::parse("1234567"), + std::nullopt, + HugeInt::parse("12345679"), + HugeInt::parse("-12345678901234567890")}), makeNullableFlatVector({5.5, 4.4, std::nullopt, 2.2, 1.1}), makeNullableFlatVector({5.5, 4.4, std::nullopt, 2.2, 1.1}), makeNullableFlatVector( @@ -237,7 +249,8 @@ TEST_F(PrefixSortTest, fuzz) { TINYINT(), SMALLINT(), BIGINT(), - HUGEINT(), + DECIMAL(12, 2), + DECIMAL(25, 6), REAL(), DOUBLE(), TIMESTAMP(), @@ -260,7 +273,8 @@ TEST_F(PrefixSortTest, fuzzMulti) { TINYINT(), SMALLINT(), BIGINT(), - HUGEINT(), + DECIMAL(12, 2), + DECIMAL(25, 6), REAL(), DOUBLE(), TIMESTAMP(),