From fb031b0632a00db1e1ec952bfdbd0c1461003c34 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 9 Aug 2023 09:07:59 +0200 Subject: [PATCH] Merge pull request #39 from ClickHouse/count-from-record-batch Allow to get number of rows in record batch (cherry picked from commit 1d93838f69a802639ca144ea5704a98e2481810d) --- cpp/src/arrow/ipc/reader.cc | 17 +++++++++++++++++ cpp/src/arrow/ipc/reader.h | 2 ++ 2 files changed, 19 insertions(+) diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index d272c78560f82..a34517c660108 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -1369,6 +1369,23 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader { return total; } + Result RecordBatchCountRows(int i) override { + DCHECK_GE(i, 0); + DCHECK_LT(i, num_record_batches()); + ARROW_ASSIGN_OR_RAISE(auto outer_message, + ReadMessageFromBlock(GetRecordBatchBlock(i))); + auto metadata = outer_message->metadata(); + const flatbuf::Message* message = nullptr; + RETURN_NOT_OK( + internal::VerifyMessage(metadata->data(), metadata->size(), &message)); + auto batch = message->header_as_RecordBatch(); + if (batch == nullptr) { + return Status::IOError( + "Header-type of flatbuffer-encoded Message is not RecordBatch."); + } + return batch->length(); + } + Status Open(const std::shared_ptr& file, int64_t footer_offset, const IpcReadOptions& options) { owned_file_ = file; diff --git a/cpp/src/arrow/ipc/reader.h b/cpp/src/arrow/ipc/reader.h index 888f59a627771..2e876d65f567f 100644 --- a/cpp/src/arrow/ipc/reader.h +++ b/cpp/src/arrow/ipc/reader.h @@ -203,6 +203,8 @@ class ARROW_EXPORT RecordBatchFileReader /// \brief Computes the total number of rows in the file. virtual Result CountRows() = 0; + virtual Result RecordBatchCountRows(int i) = 0; + /// \brief Begin loading metadata for the desired batches into memory. /// /// This method will also begin loading all dictionaries messages into memory.