Skip to content

Commit

Permalink
[GLUTEN-7179][CH] Fix infinite loop with parquet column index reader (a…
Browse files Browse the repository at this point in the history
  • Loading branch information
lwz9103 authored and shamirchen committed Oct 14, 2024
1 parent eabd43a commit 438a656
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 2 deletions.
4 changes: 2 additions & 2 deletions cpp-ch/local-engine/Storages/Parquet/ColumnIndexFilter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ struct DESCENDING : BoundaryOrder
{
if (lowerLeft > lowerRight)
return std::nullopt;
Int32 i = std::floor((lowerLeft + lowerRight) / 2);
Int32 i = floorMid(lowerLeft, lowerRight);
if (comparator.compareValueToMax(i) > 0)
lowerRight = upperRight = i - 1;
else if (comparator.compareValueToMin(i) < 0)
Expand All @@ -406,7 +406,7 @@ struct DESCENDING : BoundaryOrder
{
if (upperLeft > upperRight)
return std::nullopt;
Int32 i = std::ceil((upperLeft + upperRight) / 2);
Int32 i = ceilingMid(upperLeft, upperRight);
if (comparator.compareValueToMax(i) > 0)
upperRight = i - 1;
else if (comparator.compareValueToMin(i) < 0)
Expand Down
48 changes: 48 additions & 0 deletions cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,24 @@
#include <Common/BlockTypeUtils.h>
#include <Common/QueryContext.h>


# define ASSERT_DURATION_LE(secs, stmt) \
{ \
std::promise<bool> completed; \
auto stmt_future = completed.get_future(); \
std::thread( \
[&](std::promise<bool> & completed) \
{ \
stmt; \
completed.set_value(true); \
}, \
std::ref(completed)) \
.detach(); \
if (stmt_future.wait_for(std::chrono::seconds(secs)) == std::future_status::timeout) \
GTEST_FATAL_FAILURE_(" timed out (> " #secs " seconds). Check code for infinite loops"); \
}


namespace DB::ErrorCodes
{
extern const int LOGICAL_ERROR;
Expand Down Expand Up @@ -163,6 +181,13 @@ class CIBuilder
return *this;
}

CIBuilder & addSamePages(size_t num, int64_t nullCount, const std::string & min, const std::string & max)
{
for (size_t i = 0; i < num; ++i)
addPage(nullCount, min, max);
return *this;
}

parquet::ColumnIndexPtr build() const
{
const parquet::ColumnDescriptor descr(node_, /*max_definition_level=*/1, 0);
Expand All @@ -188,6 +213,13 @@ class OIBuilder
return *this;
}

OIBuilder & addSamePages(size_t num, size_t row_count)
{
for (size_t i = 0; i < num; ++i)
addPage(row_count);
return *this;
}

parquet::OffsetIndexPtr build() const
{
parquet::OffsetIndexBuilderPtr builder = parquet::OffsetIndexBuilder::Make();
Expand Down Expand Up @@ -298,6 +330,13 @@ static const CIBuilder c5 = CIBuilder(PNB::optional(parquet::Type::INT64).named(
static const OIBuilder o5 = OIBuilder().addPage(1).addPage(29);
static const parquet::ColumnDescriptor d5 = c5.descr();

// GLUTEN-7179 - test customer.c_mktsegment = 'BUILDING'
static const CIBuilder c6 = CIBuilder(PNB::optional(parquet::Type::BYTE_ARRAY).as(parquet::ConvertedType::UTF8).named("c_mktsegment"))
.addSamePages(75, 0, "AUTOMOBILE", "MACHINERY")
.addPage(0, "AUTOMOBILE", "FURNITURE");
static const OIBuilder o6 = OIBuilder().addSamePages(77, 10);
static const parquet::ColumnDescriptor d6 = c6.descr();

local_engine::ColumnIndexStore buildTestColumnIndexStore()
{
local_engine::ColumnIndexStore result;
Expand All @@ -306,6 +345,7 @@ local_engine::ColumnIndexStore buildTestColumnIndexStore()
result[d3.name()] = std::move(local_engine::ColumnIndex::create(&d3, c3.build(), o3.build()));
result[d4.name()] = std::move(local_engine::ColumnIndex::create(&d4, nullptr, o3.build()));
result[d5.name()] = std::move(local_engine::ColumnIndex::create(&d5, c5.build(), o5.build()));
result[d6.name()] = std::move(local_engine::ColumnIndex::create(&d6, c6.build(), o6.build()));
return result;
}

Expand All @@ -317,6 +357,7 @@ AnotherRowType buildTestRowType()
result.emplace_back(toAnotherFieldType(d3));
result.emplace_back(toAnotherFieldType(d4));
result.emplace_back(toAnotherFieldType(d5));
result.emplace_back(toAnotherFieldType(d6));
return result;
}

Expand Down Expand Up @@ -469,6 +510,13 @@ TEST(ColumnIndex, FilteringWithAllNullPages)
// testCondition("column5 == 1234567", TOTALSIZE);
// testCondition("column5 >= 1234567", TOTALSIZE);
}

TEST(ColumnIndex, GLUTEN_7179_INFINTE_LOOP)
{
using namespace test_utils;
ASSERT_DURATION_LE(10, { testCondition("c_mktsegment = 'BUILDING'", 760); })
}

TEST(ColumnIndex, FilteringWithNotFoundColumnName)
{
using namespace test_utils;
Expand Down

0 comments on commit 438a656

Please sign in to comment.