From 438a65685990a2f2c02fe6e5612149265eea6e3b Mon Sep 17 00:00:00 2001 From: Wenzheng Liu Date: Wed, 11 Sep 2024 10:25:47 +0800 Subject: [PATCH] [GLUTEN-7179][CH] Fix infinite loop with parquet column index reader (#7185) --- .../Storages/Parquet/ColumnIndexFilter.cpp | 4 +- .../tests/gtest_parquet_columnindex.cpp | 48 +++++++++++++++++++ 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/cpp-ch/local-engine/Storages/Parquet/ColumnIndexFilter.cpp b/cpp-ch/local-engine/Storages/Parquet/ColumnIndexFilter.cpp index 1063cba8a02f..52eb7a4f0d4c 100644 --- a/cpp-ch/local-engine/Storages/Parquet/ColumnIndexFilter.cpp +++ b/cpp-ch/local-engine/Storages/Parquet/ColumnIndexFilter.cpp @@ -393,7 +393,7 @@ struct DESCENDING : BoundaryOrder { if (lowerLeft > lowerRight) return std::nullopt; - Int32 i = std::floor((lowerLeft + lowerRight) / 2); + Int32 i = floorMid(lowerLeft, lowerRight); if (comparator.compareValueToMax(i) > 0) lowerRight = upperRight = i - 1; else if (comparator.compareValueToMin(i) < 0) @@ -406,7 +406,7 @@ struct DESCENDING : BoundaryOrder { if (upperLeft > upperRight) return std::nullopt; - Int32 i = std::ceil((upperLeft + upperRight) / 2); + Int32 i = ceilingMid(upperLeft, upperRight); if (comparator.compareValueToMax(i) > 0) upperRight = i - 1; else if (comparator.compareValueToMin(i) < 0) diff --git a/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp b/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp index e42a2a89a3c0..48ea16a4bc6f 100644 --- a/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp +++ b/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp @@ -42,6 +42,24 @@ #include #include + +# define ASSERT_DURATION_LE(secs, stmt) \ + { \ + std::promise completed; \ + auto stmt_future = completed.get_future(); \ + std::thread( \ + [&](std::promise & completed) \ + { \ + stmt; \ + completed.set_value(true); \ + }, \ + std::ref(completed)) \ + .detach(); \ + if (stmt_future.wait_for(std::chrono::seconds(secs)) == std::future_status::timeout) \ + GTEST_FATAL_FAILURE_(" timed out (> " #secs " seconds). Check code for infinite loops"); \ + } + + namespace DB::ErrorCodes { extern const int LOGICAL_ERROR; @@ -163,6 +181,13 @@ class CIBuilder return *this; } + CIBuilder & addSamePages(size_t num, int64_t nullCount, const std::string & min, const std::string & max) + { + for (size_t i = 0; i < num; ++i) + addPage(nullCount, min, max); + return *this; + } + parquet::ColumnIndexPtr build() const { const parquet::ColumnDescriptor descr(node_, /*max_definition_level=*/1, 0); @@ -188,6 +213,13 @@ class OIBuilder return *this; } + OIBuilder & addSamePages(size_t num, size_t row_count) + { + for (size_t i = 0; i < num; ++i) + addPage(row_count); + return *this; + } + parquet::OffsetIndexPtr build() const { parquet::OffsetIndexBuilderPtr builder = parquet::OffsetIndexBuilder::Make(); @@ -298,6 +330,13 @@ static const CIBuilder c5 = CIBuilder(PNB::optional(parquet::Type::INT64).named( static const OIBuilder o5 = OIBuilder().addPage(1).addPage(29); static const parquet::ColumnDescriptor d5 = c5.descr(); +// GLUTEN-7179 - test customer.c_mktsegment = 'BUILDING' +static const CIBuilder c6 = CIBuilder(PNB::optional(parquet::Type::BYTE_ARRAY).as(parquet::ConvertedType::UTF8).named("c_mktsegment")) + .addSamePages(75, 0, "AUTOMOBILE", "MACHINERY") + .addPage(0, "AUTOMOBILE", "FURNITURE"); +static const OIBuilder o6 = OIBuilder().addSamePages(77, 10); +static const parquet::ColumnDescriptor d6 = c6.descr(); + local_engine::ColumnIndexStore buildTestColumnIndexStore() { local_engine::ColumnIndexStore result; @@ -306,6 +345,7 @@ local_engine::ColumnIndexStore buildTestColumnIndexStore() result[d3.name()] = std::move(local_engine::ColumnIndex::create(&d3, c3.build(), o3.build())); result[d4.name()] = std::move(local_engine::ColumnIndex::create(&d4, nullptr, o3.build())); result[d5.name()] = std::move(local_engine::ColumnIndex::create(&d5, c5.build(), o5.build())); + result[d6.name()] = std::move(local_engine::ColumnIndex::create(&d6, c6.build(), o6.build())); return result; } @@ -317,6 +357,7 @@ AnotherRowType buildTestRowType() result.emplace_back(toAnotherFieldType(d3)); result.emplace_back(toAnotherFieldType(d4)); result.emplace_back(toAnotherFieldType(d5)); + result.emplace_back(toAnotherFieldType(d6)); return result; } @@ -469,6 +510,13 @@ TEST(ColumnIndex, FilteringWithAllNullPages) // testCondition("column5 == 1234567", TOTALSIZE); // testCondition("column5 >= 1234567", TOTALSIZE); } + +TEST(ColumnIndex, GLUTEN_7179_INFINTE_LOOP) +{ + using namespace test_utils; + ASSERT_DURATION_LE(10, { testCondition("c_mktsegment = 'BUILDING'", 760); }) +} + TEST(ColumnIndex, FilteringWithNotFoundColumnName) { using namespace test_utils;