From 802ff815dd457d1a5376ed663094a6741ddf3890 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 9 Aug 2023 07:05:31 +0000 Subject: [PATCH] Allow to get number of rows in record batch --- cpp/src/arrow/ipc/reader.cc | 17 +++++++++++++++++ cpp/src/arrow/ipc/reader.h | 2 ++ 2 files changed, 19 insertions(+) diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index a1b17afaaf9f4..917e48ddd5dd0 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -1240,6 +1240,23 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader { return total; } + Result RecordBatchCountRows(int i) override { + DCHECK_GE(i, 0); + DCHECK_LT(i, num_record_batches()); + ARROW_ASSIGN_OR_RAISE(auto outer_message, + ReadMessageFromBlock(GetRecordBatchBlock(i))); + auto metadata = outer_message->metadata(); + const flatbuf::Message* message = nullptr; + RETURN_NOT_OK( + internal::VerifyMessage(metadata->data(), metadata->size(), &message)); + auto batch = message->header_as_RecordBatch(); + if (batch == nullptr) { + return Status::IOError( + "Header-type of flatbuffer-encoded Message is not RecordBatch."); + } + return batch->length(); + } + Status Open(const std::shared_ptr& file, int64_t footer_offset, const IpcReadOptions& options) { owned_file_ = file; diff --git a/cpp/src/arrow/ipc/reader.h b/cpp/src/arrow/ipc/reader.h index ad7969b31c991..6e28433e01c26 100644 --- a/cpp/src/arrow/ipc/reader.h +++ b/cpp/src/arrow/ipc/reader.h @@ -203,6 +203,8 @@ class ARROW_EXPORT RecordBatchFileReader /// \brief Computes the total number of rows in the file. virtual Result CountRows() = 0; + virtual Result RecordBatchCountRows(int i) = 0; + /// \brief Begin loading metadata for the desired batches into memory. /// /// This method will also begin loading all dictionaries messages into memory.