From 21a89cfd65632971cf7d974e3ce9030c5689f85e Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 1 Mar 2022 14:11:47 +0300 Subject: [PATCH] Merge pull request #9 from taiyang-li/raw_orc_reader Add interface to get raw orc reader from adapters (cherry picked from commit ce6b7af516cff9b106e0f7b1c30628f18e7a6169) --- cpp/src/arrow/adapters/orc/adapter.cc | 9 +++++++++ cpp/src/arrow/adapters/orc/adapter.h | 13 +++++++++++++ 2 files changed, 22 insertions(+) diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc index 98784450b3cce..14340ac285e28 100644 --- a/cpp/src/arrow/adapters/orc/adapter.cc +++ b/cpp/src/arrow/adapters/orc/adapter.cc @@ -222,6 +222,11 @@ class ORCFileReader::Impl { return Init(); } + virtual liborc::Reader* GetRawORCReader() { + return reader_.get(); + } + + Status Init() { int64_t nstripes = reader_->getNumberOfStripes(); stripes_.resize(static_cast(nstripes)); @@ -548,6 +553,10 @@ class ORCFileReader::Impl { return NextStripeReader(batch_size, empty_vec); } + liborc::Reader* ORCFileReader::GetRawORCReader() { + return impl_->GetRawORCReader(); + } + private: MemoryPool* pool_; std::unique_ptr reader_; diff --git a/cpp/src/arrow/adapters/orc/adapter.h b/cpp/src/arrow/adapters/orc/adapter.h index 4ffff81f355f1..41f53d3474d53 100644 --- a/cpp/src/arrow/adapters/orc/adapter.h +++ b/cpp/src/arrow/adapters/orc/adapter.h @@ -53,6 +53,19 @@ class ARROW_EXPORT ORCFileReader { public: ~ORCFileReader(); + /// \brief Creates a new ORC reader. + /// + /// \param[in] file the data source + /// \param[in] pool a MemoryPool to use for buffer allocations + /// \param[out] reader the returned reader object + /// \return Status + ARROW_DEPRECATED("Deprecated in 6.0.0. Use Result-returning overload instead.") + static Status Open(const std::shared_ptr& file, MemoryPool* pool, + std::unique_ptr* reader); + + /// \brief Get ORC reader from inside. + liborc::Reader* GetRawORCReader(); + /// \brief Creates a new ORC reader /// /// \param[in] file the data source