diff --git a/velox/dwio/parquet/reader/PageReader.cpp b/velox/dwio/parquet/reader/PageReader.cpp index cf46fdb58184..c185358470e6 100644 --- a/velox/dwio/parquet/reader/PageReader.cpp +++ b/velox/dwio/parquet/reader/PageReader.cpp @@ -719,8 +719,14 @@ void PageReader::skip(int64_t numRows) { } firstUnvisited_ += numRows; + if (toSkip == 0) { + return; + } // Skip nulls toSkip = skipNulls(toSkip); + if (toSkip == 0) { + return; + } // Skip the decoder if (isDictionary()) { diff --git a/velox/dwio/parquet/tests/examples/struct_of_array.parquet b/velox/dwio/parquet/tests/examples/struct_of_array.parquet new file mode 100644 index 000000000000..21dd6dcf8837 Binary files /dev/null and b/velox/dwio/parquet/tests/examples/struct_of_array.parquet differ diff --git a/velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp b/velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp index 4261ee702249..476272aac692 100644 --- a/velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp +++ b/velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp @@ -370,7 +370,6 @@ TEST_F(ParquetTableScanTest, decimalSubfieldFilter) { "Scalar function signature is not supported: eq(DECIMAL(5, 2), DECIMAL(5, 1))"); } -// Core dump is fixed. TEST_F(ParquetTableScanTest, map) { auto vector = makeMapVector({{{"name", "gluten"}}}); @@ -399,7 +398,6 @@ TEST_F(ParquetTableScanTest, nullMap) { assertSelectWithFilter({"i", "c"}, {}, "", "SELECT i, c FROM tmp"); } -// Core dump is fixed. TEST_F(ParquetTableScanTest, singleRowStruct) { auto vector = makeArrayVector({{}}); loadData( @@ -414,7 +412,6 @@ TEST_F(ParquetTableScanTest, singleRowStruct) { assertSelectWithFilter({"s"}, {}, "", "SELECT (0, 1)"); } -// Core dump and incorrect result are fixed. TEST_F(ParquetTableScanTest, array) { auto vector = makeArrayVector({}); loadData( @@ -528,6 +525,21 @@ TEST_F(ParquetTableScanTest, reqArrayLegacy) { "SELECT UNNEST(array[array['a', 'b'], array[], array['c', 'd']])"); } +TEST_F(ParquetTableScanTest, filterOnNestedArray) { + loadData( + getExampleFilePath("struct_of_array.parquet"), + ROW({"struct"}, + {ROW({"a0", "a1"}, {ARRAY(VARCHAR()), ARRAY(INTEGER())})}), + makeRowVector( + {"unused"}, + { + makeFlatVector({}), + })); + + assertSelectWithFilter( + {"struct"}, {}, "struct.a0 is null", "SELECT ROW(NULL, NULL)"); +} + TEST_F(ParquetTableScanTest, readAsLowerCase) { auto plan = PlanBuilder(pool_.get()) .tableScan(ROW({"a"}, {BIGINT()}), {}, "")