Skip to content

Commit

Permalink
Use two-dimensional vector to store delete files
Browse files Browse the repository at this point in the history
  • Loading branch information
liujiayi771 committed Mar 13, 2024
1 parent 032bf3e commit d2ebfb7
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 7 deletions.
4 changes: 1 addition & 3 deletions cpp/velox/compute/WholeStageResultIterator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,7 @@ WholeStageResultIterator::WholeStageResultIterator(
if (auto icebergSplitInfo = std::dynamic_pointer_cast<IcebergSplitInfo>(scanInfo)) {
// Set Iceberg split.
std::unordered_map<std::string, std::string> customSplitInfo{{"table_format", "hive-iceberg"}};
auto deleteFilesFind = icebergSplitInfo->deleteFilesMap.find(paths[idx]);
auto deleteFiles = deleteFilesFind != icebergSplitInfo->deleteFilesMap.end() ? deleteFilesFind->second
: std::vector<IcebergDeleteFile>{};
auto deleteFiles = icebergSplitInfo->deleteFilesVec[idx];
split = std::make_shared<velox::connector::hive::iceberg::HiveIcebergSplit>(
kHiveConnectorId,
paths[idx],
Expand Down
5 changes: 4 additions & 1 deletion cpp/velox/compute/iceberg/IcebergPlanConverter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,10 @@ std::shared_ptr<IcebergSplitInfo> IcebergPlanConverter::parseIcebergSplitInfo(
deletes.emplace_back(IcebergDeleteFile(
fileContent, deleteFile.filepath(), format, deleteFile.recordcount(), deleteFile.filesize()));
}
icebergSplitInfo->deleteFilesMap[file.uri_file()] = std::move(deletes);
icebergSplitInfo->deleteFilesVec.emplace_back(deletes);
} else {
// Add an empty delete files vector to indicate that this data file has no delete file.
icebergSplitInfo->deleteFilesVec.emplace_back(std::vector<IcebergDeleteFile>{});
}

return icebergSplitInfo;
Expand Down
7 changes: 5 additions & 2 deletions cpp/velox/compute/iceberg/IcebergPlanConverter.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,12 @@ using namespace facebook::velox::connector::hive::iceberg;

namespace gluten {
struct IcebergSplitInfo : SplitInfo {
std::unordered_map<std::string, std::vector<IcebergDeleteFile>> deleteFilesMap;
std::vector<std::vector<IcebergDeleteFile>> deleteFilesVec;

IcebergSplitInfo(const SplitInfo& splitInfo) : SplitInfo(splitInfo) {}
IcebergSplitInfo(const SplitInfo& splitInfo) : SplitInfo(splitInfo) {
// Reserve the actual size of the deleteFilesVec.
deleteFilesVec.reserve(splitInfo.paths.capacity());
}
};

class IcebergPlanConverter {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ class VeloxIcebergSuite extends WholeStageTransformerSuite {
|update iceberg_mor_tb set name = 'new_a2' where id = 'a2';
|""".stripMargin
)

// Merge into.
spark.sql(
"""
Expand Down

0 comments on commit d2ebfb7

Please sign in to comment.