From d1ac07936babde1c724f1b8808fb9c1d0c038846 Mon Sep 17 00:00:00 2001 From: wypb Date: Wed, 11 Sep 2024 18:15:14 -0700 Subject: [PATCH] Disable fastpath reading for some data types in ORC (#10939) Summary: As discussed in https://github.com/prestodb/presto/pull/23037#issuecomment-2330705136, we need to disable fastpath reads of some ORC data types, so that we can add TPCDS related tests in the Presto native module. CC: Yuhta aditi-pandit Pull Request resolved: https://github.com/facebookincubator/velox/pull/10939 Reviewed By: Yuhta Differential Revision: D62373833 Pulled By: mbasmanova fbshipit-source-id: f38c7959ffb72c1ecbda9c7de4631dfa5ee73e39 --- .../reader/SelectiveDecimalColumnReader.h | 7 ++++ .../SelectiveIntegerDirectColumnReader.h | 6 ++- .../SelectiveStringDictionaryColumnReader.h | 7 ++++ velox/dwio/orc/test/ReaderTest.cpp | 39 ++++++++++++++++++ velox/dwio/orc/test/examples/rlev2.orc | Bin 0 -> 552 bytes 5 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 velox/dwio/orc/test/examples/rlev2.orc diff --git a/velox/dwio/dwrf/reader/SelectiveDecimalColumnReader.h b/velox/dwio/dwrf/reader/SelectiveDecimalColumnReader.h index f27bfe350ef8..53790d9ce2e6 100644 --- a/velox/dwio/dwrf/reader/SelectiveDecimalColumnReader.h +++ b/velox/dwio/dwrf/reader/SelectiveDecimalColumnReader.h @@ -33,6 +33,13 @@ class SelectiveDecimalColumnReader : public SelectiveColumnReader { DwrfParams& params, common::ScanSpec& scanSpec); + bool hasBulkPath() const override { + // Only ORC uses RLEv2 encoding. Currently, ORC decimal data does not + // support fastpath reads. When reading RLEv2-encoded decimal data + // with null, the query will fail. + return version_ != velox::dwrf::RleVersion_2; + } + void seekToRowGroup(uint32_t index) override; uint64_t skip(uint64_t numValues) override; diff --git a/velox/dwio/dwrf/reader/SelectiveIntegerDirectColumnReader.h b/velox/dwio/dwrf/reader/SelectiveIntegerDirectColumnReader.h index 297ccd9afdbc..8ccfa4cc6b86 100644 --- a/velox/dwio/dwrf/reader/SelectiveIntegerDirectColumnReader.h +++ b/velox/dwio/dwrf/reader/SelectiveIntegerDirectColumnReader.h @@ -45,6 +45,7 @@ class SelectiveIntegerDirectColumnReader const bool dataVInts = stripe.getUseVInts(si); format_ = stripe.format(); + version_ = convertRleVersion(stripe.getEncoding(encodingKey).kind()); if (format_ == velox::dwrf::DwrfFormat::kDwrf) { intDecoder_ = createDirectDecoder( stripe.getStream(si, params.streamLabels().label(), true), @@ -64,7 +65,10 @@ class SelectiveIntegerDirectColumnReader } bool hasBulkPath() const override { - return true; + // Only ORC uses RLEv2 encoding. Currently, ORC integer data does not + // support fastpath reads. When reading RLEv2-encoded integer data + // with null, the query will fail. + return version_ != velox::dwrf::RleVersion_2; } void seekToRowGroup(uint32_t index) override { diff --git a/velox/dwio/dwrf/reader/SelectiveStringDictionaryColumnReader.h b/velox/dwio/dwrf/reader/SelectiveStringDictionaryColumnReader.h index 7583fe550e42..f8ee3d6705a5 100644 --- a/velox/dwio/dwrf/reader/SelectiveStringDictionaryColumnReader.h +++ b/velox/dwio/dwrf/reader/SelectiveStringDictionaryColumnReader.h @@ -32,6 +32,13 @@ class SelectiveStringDictionaryColumnReader DwrfParams& params, common::ScanSpec& scanSpec); + bool hasBulkPath() const override { + // Only ORC uses RLEv2 encoding. Currently, ORC string data does not + // support fastpath reads. When reading RLEv2-encoded string data + // with null, the query will fail. + return version_ != velox::dwrf::RleVersion_2; + } + void seekToRowGroup(uint32_t index) override { SelectiveColumnReader::seekToRowGroup(index); auto positionsProvider = formatData_->as().seekToRowGroup(index); diff --git a/velox/dwio/orc/test/ReaderTest.cpp b/velox/dwio/orc/test/ReaderTest.cpp index dfe35da67ac0..37ce83a568c4 100644 --- a/velox/dwio/orc/test/ReaderTest.cpp +++ b/velox/dwio/orc/test/ReaderTest.cpp @@ -238,3 +238,42 @@ TEST_F(OrcReaderTest, testOrcReadAllType) { EXPECT_EQ(structCol->toString(0, 2, ",", false), "{1, 2}"); } } + +TEST_F(OrcReaderTest, testOrcRlev2) { + google::InstallFailureSignalHandler(); + const std::string dateOrc(getExamplesFilePath("rlev2.orc")); + auto schema = + ROW({"id", "price", "name"}, {BIGINT(), DECIMAL(7, 2), VARCHAR()}); + auto spec = std::make_shared(""); + spec->addAllChildFields(*schema); + + dwio::common::ReaderOptions readerOpts{pool()}; + readerOpts.setScanSpec(spec); + readerOpts.setFileFormat(dwio::common::FileFormat::ORC); + + auto reader = DwrfReader::create( + createFileBufferedInput(dateOrc, readerOpts.memoryPool()), readerOpts); + + RowReaderOptions rowReaderOptions; + rowReaderOptions.setScanSpec(spec); + auto rowReader = reader->createRowReader(rowReaderOptions); + + auto batch = BaseVector::create(schema, 0, &readerOpts.memoryPool()); + while (rowReader->next(500, batch)) { + auto rowVector = batch->as(); + auto idCol = + rowVector->childAt(0)->loadedVector()->as>(); + auto priceCol = + rowVector->childAt(1)->loadedVector()->as>(); + auto nameCol = + rowVector->childAt(2)->loadedVector()->as>(); + + EXPECT_EQ(5, rowVector->size()); + EXPECT_EQ(idCol->valueAt(0), 1); + + auto priceColType = rowVector->type()->childAt(1); + EXPECT_EQ( + DecimalUtil::toString(priceCol->valueAt(0), priceColType), "111.11"); + EXPECT_EQ(nameCol->valueAt(0), "AAAA"); + } +} diff --git a/velox/dwio/orc/test/examples/rlev2.orc b/velox/dwio/orc/test/examples/rlev2.orc new file mode 100644 index 0000000000000000000000000000000000000000..bbb1c7fdafcb2e5788e73d3b9b1363e9caf3b6c0 GIT binary patch literal 552 zcmV+@0@wXdQbQ;J0OJznVrPH=AubLUAvO*s0S*Zf06YKy<5J|}WPkx75e^n30WLN} zLqk17Lm@U36B9iX696&*0OOM7VrPH=At4SHB|a_|M<5VlaRvej1poyA0RQj@002S* z1_b~B00b5Q0CRM7baVm&XCMFo1pom5=otV2&aDBQ+XCu{1h}>Z1poj61O)&9|JVrt z0KfwR0*C+rP0XDU=rYv5aANxU@;Ql zVly-})H5^`Vly!@(K9jO65?P{;^Sg*1Og!zXCRPJ008I#05y+6OT#c6#W7905Tz@j zN~s8=7e}G-pDb3^lVb;Wn!k(zl9&!ireq>-r+soiz#t}bu=HnjNK$6 zSIoIUPK@ep?Cwon=@>VgolX$JiAVhL08p<7=>xsq8{OHKf0^29!;ji@b=$BT;BpGt zYB_sbEne4Ip1;G>qewX&{s+p>gErq~y=Au&q=G5rB}_mlS;8OXrkkidy{E(0ROj}O zwzDie?lLJvCRLtU-Bh}~8u>PP=@vMj8c~3NY{#h>2F9`Pm5CIr~fDj@A01PNTFa{Agb>DUX literal 0 HcmV?d00001