Add Row ID column reading support (#11363)

Summary: Pull Request resolved: #11363 Create framework in selective readers for generating row number field and composite with other constant fields. Then use this framework to add support for Row ID column. We move the row number generation logic into column reader during this process, so that we could filter them if needed in the future. bypass-github-export-checks Reviewed By: oerling Differential Revision: D65028172 fbshipit-source-id: 716dbdc77301267366a10302459e105d711a92f0
facebookincubator · Oct 29, 2024 · 2e0acb7 · 2e0acb7
1 parent 27d0527
commit 2e0acb7
Show file tree

Hide file tree

Showing 27 changed files with 570 additions and 368 deletions.
diff --git a/velox/common/base/RawVector.cpp b/velox/common/base/RawVector.cpp
@@ -21,10 +21,11 @@
 namespace facebook::velox {
 
 namespace {
-std::vector<int32_t> iotaData;
+raw_vector<int32_t> iotaData;
 
 bool initializeIota() {
   iotaData.resize(10000);
+  iotaData.resize(iotaData.capacity());
   std::iota(iotaData.begin(), iotaData.end(), 0);
   return true;
 }

diff --git a/velox/connectors/hive/HiveConnectorSplit.h b/velox/connectors/hive/HiveConnectorSplit.h
@@ -36,6 +36,12 @@ struct HiveBucketConversion {
   std::vector<std::unique_ptr<HiveColumnHandle>> bucketColumnHandles;
 };
 
+struct RowIdProperties {
+  int64_t metadataVersion;
+  int64_t partitionId;
+  std::string tableGuid;
+};
+
 struct HiveConnectorSplit : public connector::ConnectorSplit {
   const std::string filePath;
   dwio::common::FileFormat fileFormat;
@@ -62,6 +68,8 @@ struct HiveConnectorSplit : public connector::ConnectorSplit {
   /// the file handle.
   std::optional<FileProperties> properties;
 
+  std::optional<RowIdProperties> rowIdProperties;
+
   HiveConnectorSplit(
       const std::string& connectorId,
       const std::string& _filePath,

diff --git a/velox/connectors/hive/HiveConnectorUtil.cpp b/velox/connectors/hive/HiveConnectorUtil.cpp
@@ -260,10 +260,10 @@ inline bool isSynthesizedColumn(
   return infoColumns.count(name) != 0;
 }
 
-inline bool isRowIndexColumn(
+bool isSpecialColumn(
     const std::string& name,
-    std::shared_ptr<HiveColumnHandle> rowIndexColumn) {
-  return rowIndexColumn != nullptr && rowIndexColumn->name() == name;
+    const std::optional<std::string>& specialName) {
+  return specialName.has_value() && name == *specialName;
 }
 
 } // namespace
@@ -368,7 +368,7 @@ std::shared_ptr<common::ScanSpec> makeScanSpec(
         partitionKeys,
     const std::unordered_map<std::string, std::shared_ptr<HiveColumnHandle>>&
         infoColumns,
-    const std::shared_ptr<HiveColumnHandle>& rowIndexColumn,
+    const SpecialColumnNames& specialColumns,
     memory::MemoryPool* pool) {
   auto spec = std::make_shared<common::ScanSpec>("root");
   folly::F14FastMap<std::string, std::vector<const common::Subfield*>>
@@ -377,8 +377,9 @@ std::shared_ptr<common::ScanSpec> makeScanSpec(
   for (auto& [subfield, _] : filters) {
     if (auto name = subfield.toString();
         !isSynthesizedColumn(name, infoColumns) &&
-        !isRowIndexColumn(name, rowIndexColumn) &&
         partitionKeys.count(name) == 0) {
+      VELOX_CHECK(!isSpecialColumn(name, specialColumns.rowIndex));
+      VELOX_CHECK(!isSpecialColumn(name, specialColumns.rowId));
       filterSubfields[getColumnName(subfield)].push_back(&subfield);
     }
   }
@@ -387,13 +388,24 @@ std::shared_ptr<common::ScanSpec> makeScanSpec(
   for (int i = 0; i < rowType->size(); ++i) {
     auto& name = rowType->nameOf(i);
     auto& type = rowType->childAt(i);
+    if (isSpecialColumn(name, specialColumns.rowIndex)) {
+      VELOX_CHECK(type->isBigint());
+      auto* fieldSpec = spec->addField(name, i);
+      fieldSpec->setColumnType(common::ScanSpec::ColumnType::kRowIndex);
+      continue;
+    }
+    if (isSpecialColumn(name, specialColumns.rowId)) {
+      VELOX_CHECK(type->isRow() && type->size() == 5);
+      auto& rowIdType = type->asRow();
+      auto* fieldSpec = spec->addFieldRecursively(name, rowIdType, i);
+      fieldSpec->setColumnType(common::ScanSpec::ColumnType::kComposite);
+      fieldSpec->childByName(rowIdType.nameOf(0))
+          ->setColumnType(common::ScanSpec::ColumnType::kRowIndex);
+      continue;
+    }
     auto it = outputSubfields.find(name);
     if (it == outputSubfields.end()) {
       auto* fieldSpec = spec->addFieldRecursively(name, *type, i);
-      if (isRowIndexColumn(name, rowIndexColumn)) {
-        VELOX_CHECK(type->isBigint());
-        fieldSpec->setExplicitRowNumber(true);
-      }
       processFieldSpec(dataColumns, type, *fieldSpec);
       filterSubfields.erase(name);
       continue;
@@ -409,12 +421,6 @@ std::shared_ptr<common::ScanSpec> makeScanSpec(
       filterSubfields.erase(it);
     }
     auto* fieldSpec = spec->addField(name, i);
-    if (isRowIndexColumn(name, rowIndexColumn)) {
-      VELOX_CHECK(type->isBigint());
-      // Set the flag for the case that the row index column only exists in
-      // remaining filters.
-      fieldSpec->setExplicitRowNumber(true);
-    }
     addSubfields(*type, subfieldSpecs, 1, pool, *fieldSpec);
     processFieldSpec(dataColumns, type, *fieldSpec);
     subfieldSpecs.clear();
@@ -448,7 +454,6 @@ std::shared_ptr<common::ScanSpec> makeScanSpec(
     if (isSynthesizedColumn(name, infoColumns)) {
       continue;
     }
-    VELOX_CHECK(!isRowIndexColumn(name, rowIndexColumn));
     auto fieldSpec = spec->getOrCreateChild(pair.first);
     fieldSpec->addFilter(*pair.second);
   }

diff --git a/velox/connectors/hive/HiveConnectorUtil.h b/velox/connectors/hive/HiveConnectorUtil.h
@@ -44,6 +44,11 @@ void checkColumnNameLowerCase(
 
 void checkColumnNameLowerCase(const core::TypedExprPtr& typeExpr);
 
+struct SpecialColumnNames {
+  std::optional<std::string> rowIndex;
+  std::optional<std::string> rowId;
+};
+
 std::shared_ptr<common::ScanSpec> makeScanSpec(
     const RowTypePtr& rowType,
     const folly::F14FastMap<std::string, std::vector<const common::Subfield*>>&
@@ -54,7 +59,7 @@ std::shared_ptr<common::ScanSpec> makeScanSpec(
         partitionKeys,
     const std::unordered_map<std::string, std::shared_ptr<HiveColumnHandle>>&
         infoColumns,
-    const std::shared_ptr<HiveColumnHandle>& rowIndexColumn,
+    const SpecialColumnNames& specialColumns,
     memory::MemoryPool* pool);
 
 void configureReaderOptions(

diff --git a/velox/connectors/hive/HiveDataSource.cpp b/velox/connectors/hive/HiveDataSource.cpp
@@ -22,7 +22,6 @@
 
 #include "velox/common/testutil/TestValue.h"
 #include "velox/connectors/hive/HiveConfig.h"
-#include "velox/connectors/hive/HiveConnectorUtil.h"
 #include "velox/dwio/common/ReaderFactory.h"
 #include "velox/expression/FieldReference.h"
 
@@ -81,18 +80,21 @@ HiveDataSource::HiveDataSource(
         handle,
         "ColumnHandle must be an instance of HiveColumnHandle for {}",
         canonicalizedName);
-
-    if (handle->columnType() == HiveColumnHandle::ColumnType::kPartitionKey) {
-      partitionKeys_.emplace(handle->name(), handle);
-    }
-
-    if (handle->columnType() == HiveColumnHandle::ColumnType::kSynthesized) {
-      infoColumns_.emplace(handle->name(), handle);
-    }
-
-    if (handle->columnType() == HiveColumnHandle::ColumnType::kRowIndex) {
-      VELOX_CHECK_NULL(rowIndexColumn_);
-      rowIndexColumn_ = handle;
+    switch (handle->columnType()) {
+      case HiveColumnHandle::ColumnType::kRegular:
+        break;
+      case HiveColumnHandle::ColumnType::kPartitionKey:
+        partitionKeys_.emplace(handle->name(), handle);
+        break;
+      case HiveColumnHandle::ColumnType::kSynthesized:
+        infoColumns_.emplace(handle->name(), handle);
+        break;
+      case HiveColumnHandle::ColumnType::kRowIndex:
+        specialColumns_.rowIndex = handle->name();
+        break;
+      case HiveColumnHandle::ColumnType::kRowId:
+        specialColumns_.rowId = handle->name();
+        break;
     }
   }
 
@@ -192,7 +194,7 @@ HiveDataSource::HiveDataSource(
       hiveTableHandle_->dataColumns(),
       partitionKeys_,
       infoColumns_,
-      rowIndexColumn_,
+      specialColumns_,
       pool_);
   if (remainingFilter) {
     metadataFilter_ = std::make_shared<common::MetadataFilter>(
@@ -257,7 +259,7 @@ std::unique_ptr<HivePartitionFunction> HiveDataSource::setupBucketConversion() {
         hiveTableHandle_->dataColumns(),
         partitionKeys_,
         infoColumns_,
-        rowIndexColumn_,
+        specialColumns_,
         pool_);
     newScanSpec->moveAdaptationFrom(*scanSpec_);
     scanSpec_ = std::move(newScanSpec);
@@ -266,6 +268,30 @@ std::unique_ptr<HivePartitionFunction> HiveDataSource::setupBucketConversion() {
       split_->bucketConversion->tableBucketCount, std::move(bucketChannels));
 }
 
+void HiveDataSource::setupRowIdColumn() {
+  VELOX_CHECK(split_->rowIdProperties.has_value());
+  const auto& props = *split_->rowIdProperties;
+  auto* rowId = scanSpec_->childByName(*specialColumns_.rowId);
+  VELOX_CHECK_NOT_NULL(rowId);
+  auto& rowIdType =
+      readerOutputType_->findChild(*specialColumns_.rowId)->asRow();
+  auto rowGroupId = split_->getFileName();
+  rowId->childByName(rowIdType.nameOf(1))
+      ->setConstantValue<StringView>(
+          StringView(rowGroupId), VARCHAR(), connectorQueryCtx_->memoryPool());
+  rowId->childByName(rowIdType.nameOf(2))
+      ->setConstantValue<int64_t>(
+          props.metadataVersion, BIGINT(), connectorQueryCtx_->memoryPool());
+  rowId->childByName(rowIdType.nameOf(3))
+      ->setConstantValue<int64_t>(
+          props.partitionId, BIGINT(), connectorQueryCtx_->memoryPool());
+  rowId->childByName(rowIdType.nameOf(4))
+      ->setConstantValue<StringView>(
+          StringView(props.tableGuid),
+          VARCHAR(),
+          connectorQueryCtx_->memoryPool());
+}
+
 void HiveDataSource::addSplit(std::shared_ptr<ConnectorSplit> split) {
   VELOX_CHECK_NULL(
       split_,
@@ -284,12 +310,15 @@ void HiveDataSource::addSplit(std::shared_ptr<ConnectorSplit> split) {
   } else {
     partitionFunction_.reset();
   }
+  if (specialColumns_.rowId.has_value()) {
+    setupRowIdColumn();
+  }
 
   splitReader_ = createSplitReader();
   // Split reader subclasses may need to use the reader options in prepareSplit
   // so we initialize it beforehand.
   splitReader_->configureReaderOptions(randomSkip_);
-  splitReader_->prepareSplit(metadataFilter_, runtimeStats_, rowIndexColumn_);
+  splitReader_->prepareSplit(metadataFilter_, runtimeStats_);
 }
 
 vector_size_t HiveDataSource::applyBucketConversion(

diff --git a/velox/connectors/hive/HiveDataSource.h b/velox/connectors/hive/HiveDataSource.h
@@ -20,6 +20,7 @@
 #include "velox/connectors/Connector.h"
 #include "velox/connectors/hive/FileHandle.h"
 #include "velox/connectors/hive/HiveConnectorSplit.h"
+#include "velox/connectors/hive/HiveConnectorUtil.h"
 #include "velox/connectors/hive/HivePartitionFunction.h"
 #include "velox/connectors/hive/SplitReader.h"
 #include "velox/connectors/hive/TableHandle.h"
@@ -125,14 +126,15 @@ class HiveDataSource : public DataSource {
       partitionKeys_;
 
   std::shared_ptr<io::IoStatistics> ioStats_;
-  std::shared_ptr<HiveColumnHandle> rowIndexColumn_;
 
  private:
   std::unique_ptr<HivePartitionFunction> setupBucketConversion();
   vector_size_t applyBucketConversion(
       const RowVectorPtr& rowVector,
       BufferPtr& indices);
 
+  void setupRowIdColumn();
+
   // Evaluates remainingFilter_ on the specified vector. Returns number of rows
   // passed. Populates filterEvalCtx_.selectedIndices and selectedBits if only
   // some rows passed the filter. If none or all rows passed
@@ -157,6 +159,7 @@ class HiveDataSource : public DataSource {
   // Column handles for the Split info columns keyed on their column names.
   std::unordered_map<std::string, std::shared_ptr<HiveColumnHandle>>
       infoColumns_;
+  SpecialColumnNames specialColumns_{};
   folly::F14FastMap<std::string, std::vector<const common::Subfield*>>
       subfields_;
   SubfieldFilters filters_;

diff --git a/velox/connectors/hive/SplitReader.cpp b/velox/connectors/hive/SplitReader.cpp
@@ -145,9 +145,8 @@ void SplitReader::configureReaderOptions(
 
 void SplitReader::prepareSplit(
     std::shared_ptr<common::MetadataFilter> metadataFilter,
-    dwio::common::RuntimeStatistics& runtimeStats,
-    const std::shared_ptr<HiveColumnHandle>& rowIndexColumn) {
-  createReader(std::move(metadataFilter), rowIndexColumn);
+    dwio::common::RuntimeStatistics& runtimeStats) {
+  createReader(std::move(metadataFilter));
 
   if (checkIfSplitIsEmpty(runtimeStats)) {
     VELOX_CHECK(emptySplit_);
@@ -222,8 +221,7 @@ std::string SplitReader::toString() const {
 }
 
 void SplitReader::createReader(
-    std::shared_ptr<common::MetadataFilter> metadataFilter,
-    const std::shared_ptr<HiveColumnHandle>& rowIndexColumn) {
+    std::shared_ptr<common::MetadataFilter> metadataFilter) {
   VELOX_CHECK_NE(
       baseReaderOpts_.fileFormat(), dwio::common::FileFormat::UNKNOWN);
 
@@ -264,10 +262,6 @@ void SplitReader::createReader(
   auto& fileType = baseReader_->rowType();
   auto columnTypes = adaptColumns(fileType, baseReaderOpts_.fileSchema());
   auto columnNames = fileType->names();
-  if (rowIndexColumn != nullptr) {
-    bool isExplicit = scanSpec_->childByName(rowIndexColumn->name()) != nullptr;
-    setRowIndexColumn(rowIndexColumn, isExplicit);
-  }
   configureRowReaderOptions(
       hiveTableHandle_->tableParameters(),
       scanSpec_,
@@ -312,17 +306,6 @@ void SplitReader::createRowReader() {
   baseRowReader_ = baseReader_->createRowReader(baseRowReaderOpts_);
 }
 
-void SplitReader::setRowIndexColumn(
-    const std::shared_ptr<HiveColumnHandle>& rowIndexColumn,
-    bool isExplicit) {
-  dwio::common::RowNumberColumnInfo rowNumberColumnInfo;
-  rowNumberColumnInfo.insertPosition =
-      readerOutputType_->getChildIdx(rowIndexColumn->name());
-  rowNumberColumnInfo.name = rowIndexColumn->name();
-  rowNumberColumnInfo.isExplicit = isExplicit;
-  baseRowReaderOpts_.setRowNumberColumnInfo(std::move(rowNumberColumnInfo));
-}
-
 std::vector<TypePtr> SplitReader::adaptColumns(
     const RowTypePtr& fileType,
     const std::shared_ptr<const velox::RowType>& tableSchema) {
@@ -350,7 +333,8 @@ std::vector<TypePtr> SplitReader::adaptColumns(
           connectorQueryCtx_->memoryPool(),
           connectorQueryCtx_->sessionTimezone());
       childSpec->setConstantValue(constant);
-    } else if (!childSpec->isExplicitRowNumber()) {
+    } else if (
+        childSpec->columnType() == common::ScanSpec::ColumnType::kRegular) {
       auto fileTypeIdx = fileType->getChildIdxIfExists(fieldName);
       if (!fileTypeIdx.has_value()) {
         // Column is missing. Most likely due to schema evolution.

diff --git a/velox/connectors/hive/SplitReader.h b/velox/connectors/hive/SplitReader.h
@@ -77,8 +77,7 @@ class SplitReader {
   /// would be called only once per incoming split
   virtual void prepareSplit(
       std::shared_ptr<common::MetadataFilter> metadataFilter,
-      dwio::common::RuntimeStatistics& runtimeStats,
-      const std::shared_ptr<HiveColumnHandle>& rowIndexColumn);
+      dwio::common::RuntimeStatistics& runtimeStats);
 
   virtual uint64_t next(uint64_t size, VectorPtr& output);
 
@@ -114,9 +113,7 @@ class SplitReader {
 
   /// Create the dwio::common::Reader object baseReader_, which will be used to
   /// read the data file's metadata and schema
-  void createReader(
-      std::shared_ptr<common::MetadataFilter> metadataFilter,
-      const std::shared_ptr<HiveColumnHandle>& rowIndexColumn);
+  void createReader(std::shared_ptr<common::MetadataFilter> metadataFilter);
 
   /// Check if the hiveSplit_ is empty. The split is considered empty when
   ///   1) The data file is missing but the user chooses to ignore it
@@ -136,10 +133,6 @@ class SplitReader {
       const RowTypePtr& fileType,
       const std::shared_ptr<const velox::RowType>& tableSchema);
 
-  void setRowIndexColumn(
-      const std::shared_ptr<HiveColumnHandle>& rowIndexColumn,
-      bool isExplicit);
-
   void setPartitionValue(
       common::ScanSpec* spec,
       const std::string& partitionKey,

diff --git a/velox/connectors/hive/TableHandle.h b/velox/connectors/hive/TableHandle.h
@@ -34,7 +34,8 @@ class HiveColumnHandle : public ColumnHandle {
     kSynthesized,
     /// A zero-based row number of type BIGINT auto-generated by the connector.
     /// Rows numbers are unique within a single file only.
-    kRowIndex
+    kRowIndex,
+    kRowId,
   };
 
   /// NOTE: 'dataType' is the column type in target write table. 'hiveType' is

diff --git a/velox/connectors/hive/iceberg/IcebergSplitReader.cpp b/velox/connectors/hive/iceberg/IcebergSplitReader.cpp
@@ -54,9 +54,8 @@ IcebergSplitReader::IcebergSplitReader(
 
 void IcebergSplitReader::prepareSplit(
     std::shared_ptr<common::MetadataFilter> metadataFilter,
-    dwio::common::RuntimeStatistics& runtimeStats,
-    const std::shared_ptr<HiveColumnHandle>& rowIndexColumn) {
-  createReader(std::move(metadataFilter), rowIndexColumn);
+    dwio::common::RuntimeStatistics& runtimeStats) {
+  createReader(std::move(metadataFilter));
 
   if (checkIfSplitIsEmpty(runtimeStats)) {
     VELOX_CHECK(emptySplit_);