Skip to content

Commit

Permalink
Report inputSizeInBytes in HiveDataSink (#11339)
Browse files Browse the repository at this point in the history
Summary: Pull Request resolved: #11339

Reviewed By: xiaoxmeng

Differential Revision: D64886686

Pulled By: kewang1024

fbshipit-source-id: 6998684539d67223dab56fa5d80b7bc5fae0497e
  • Loading branch information
kewang1024 authored and facebook-github-bot committed Oct 24, 2024
1 parent 10cdf6f commit c14040f
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 3 deletions.
5 changes: 2 additions & 3 deletions velox/connectors/hive/HiveDataSink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,7 @@ void HiveDataSink::write(size_t index, RowVectorPtr input) {
auto dataInput = makeDataInput(dataChannels_, input);

writers_[index]->write(dataInput);
writerInfo_[index]->inputSizeInBytes += dataInput->estimateFlatSize();
writerInfo_[index]->numWrittenRows += dataInput->size();
}

Expand Down Expand Up @@ -661,9 +662,7 @@ std::vector<std::string> HiveDataSink::close() {
("targetFileName", info->writerParameters.targetFileName())
("fileSize", ioStats_.at(i)->rawBytesWritten())))
("rowCount", info->numWrittenRows)
// TODO(gaoge): track and send the fields when inMemoryDataSizeInBytes
// and containsNumberedFileNames are needed at coordinator when file_renaming_enabled are turned on.
("inMemoryDataSizeInBytes", 0)
("inMemoryDataSizeInBytes", info->inputSizeInBytes)
("onDiskDataSizeInBytes", ioStats_.at(i)->rawBytesWritten())
("containsNumberedFileNames", true));
// clang-format on
Expand Down
1 change: 1 addition & 0 deletions velox/connectors/hive/HiveDataSink.h
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,7 @@ struct HiveWriterInfo {
const std::shared_ptr<memory::MemoryPool> sinkPool;
const std::shared_ptr<memory::MemoryPool> sortPool;
int64_t numWrittenRows = 0;
int64_t inputSizeInBytes = 0;
};

/// Identifies a hive writer.
Expand Down
1 change: 1 addition & 0 deletions velox/exec/tests/TableWriteTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2631,6 +2631,7 @@ TEST_P(AllTableWriterTest, tableWriteOutputCheck) {
std::filesystem::path path{writeFileFullPath};
const auto actualFileSize = fs::file_size(path);
ASSERT_EQ(obj["onDiskDataSizeInBytes"].asInt(), actualFileSize);
ASSERT_GT(obj["inMemoryDataSizeInBytes"].asInt(), 0);
ASSERT_EQ(writerInfoObj["fileSize"], actualFileSize);
if (commitStrategy_ == CommitStrategy::kNoCommit) {
ASSERT_EQ(writeFileName, targetFileName);
Expand Down

0 comments on commit c14040f

Please sign in to comment.