From 08006fc352b2e57eccf878ab39138006c6016be6 Mon Sep 17 00:00:00 2001 From: Jiaqi Zhang Date: Thu, 12 Dec 2024 01:29:37 -0800 Subject: [PATCH] Replace columnHasNulls with row stats --- velox/exec/RowContainer.cpp | 9 ++------- velox/exec/RowContainer.h | 18 ++++++------------ 2 files changed, 8 insertions(+), 19 deletions(-) diff --git a/velox/exec/RowContainer.cpp b/velox/exec/RowContainer.cpp index d8b622c3a728..125767806751 100644 --- a/velox/exec/RowContainer.cpp +++ b/velox/exec/RowContainer.cpp @@ -188,7 +188,6 @@ RowContainer::RowContainer( if (nullableKeys_) { ++nullOffset; } - columnHasNulls_.push_back(false); } // Make offset at least sizeof pointer so that there is space for a // free list next pointer below the bit at 'freeFlagOffset_'. @@ -217,7 +216,6 @@ RowContainer::RowContainer( nullOffsets_.push_back(nullOffset); ++nullOffset; isVariableWidth |= !type->isFixedWidth(); - columnHasNulls_.push_back(false); } if (hasProbedFlag) { nullOffsets_.push_back(nullOffset); @@ -467,9 +465,6 @@ void RowContainer::freeRowsExtraMemory( } void RowContainer::invalidateColumnStats() { - if (rowColumnsStats_.empty()) { - return; - } rowColumnsStats_.clear(); } @@ -567,7 +562,8 @@ void RowContainer::store( decoded, rows, isKey, - offsets_[column]); + offsets_[column], + column); } else { const auto rowColumn = rowColumns_[column]; VELOX_DYNAMIC_TYPE_DISPATCH_ALL( @@ -815,7 +811,6 @@ void RowContainer::storeComplexType( if (decoded.isNullAt(index)) { VELOX_DCHECK(nullMask); row[nullByte] |= nullMask; - updateColumnHasNulls(column, true); return; } RowSizeTracker tracker(row[rowSizeOffset_], *stringAllocator_); diff --git a/velox/exec/RowContainer.h b/velox/exec/RowContainer.h index 3782dff69816..a39fee56a113 100644 --- a/velox/exec/RowContainer.h +++ b/velox/exec/RowContainer.h @@ -823,7 +823,8 @@ class RowContainer { /// Returns true if specified column may have nulls, false otherwise. inline bool columnHasNulls(int32_t columnIndex) const { - return columnHasNulls_[columnIndex]; + return columnStats(columnIndex).has_value() && + columnStats(columnIndex)->nullCount() > 0; } const std::vector& accumulators() const { @@ -1015,7 +1016,6 @@ class RowContainer { // Do not leave an uninitialized value in the case of a // null. This is an error with valgrind/asan. *reinterpret_cast(row + offset) = T(); - updateColumnHasNulls(columnIndex, true); return; } if constexpr (std::is_same_v) { @@ -1056,6 +1056,7 @@ class RowContainer { for (int32_t i = 0; i < rows.size(); ++i) { storeWithNulls( decoded, i, isKey, rows[i], offset, nullByte, nullMask, column); + updateColumnStats(decoded, i, rows[i], column); } } @@ -1064,9 +1065,11 @@ class RowContainer { const DecodedVector& decoded, folly::Range rows, bool isKey, - int32_t offset) { + int32_t offset, + int32_t column) { for (int32_t i = 0; i < rows.size(); ++i) { storeNoNulls(decoded, i, isKey, rows[i], offset); + updateColumnStats(decoded, i, rows[i], column); } } @@ -1467,12 +1470,6 @@ class RowContainer { // method is called whenever a row is erased. void invalidateColumnStats(); - // Updates the specific column's columnHasNulls_ flag, if 'hasNulls' is true. - // columnHasNulls_ flag is false by default. - inline void updateColumnHasNulls(int32_t columnIndex, bool hasNulls) { - columnHasNulls_[columnIndex] = columnHasNulls_[columnIndex] || hasNulls; - } - const std::vector keyTypes_; const bool nullableKeys_; const bool isJoinBuild_; @@ -1481,8 +1478,6 @@ class RowContainer { const std::unique_ptr stringAllocator_; - std::vector columnHasNulls_; - // Indicates if we can add new row to this row container. It is set to false // after user calls 'getRowPartitions()' to create 'rowPartitions' object for // parallel join build. @@ -1637,7 +1632,6 @@ inline void RowContainer::storeWithNulls( if (decoded.isNullAt(rowIndex)) { row[nullByte] |= nullMask; memset(row + offset, 0, sizeof(int128_t)); - updateColumnHasNulls(columnIndex, true); return; } HugeInt::serialize(decoded.valueAt(rowIndex), row + offset);