Skip to content

Commit

Permalink
Back out "Optimize evaluation by reducing peeled vectors size" (faceb…
Browse files Browse the repository at this point in the history
…ookincubator#10742)

Summary:
This reverses an optimization in peeling where a flat vector is
created as a peeled vector if the number of rows to peel is less
than 1/8 the size of the dictionary's alphabet, to prevent the
circulation of large peeled vectors that can cause large
intermediate allocations and affect memory and performance.

This resulted in a bug where the Common sub expression optimization,
that relies on the address of the input vector to re-use the cached
result, would get a different flat vector in its subsequent
invocation but would have the same memory address and assume it can
re-use the cached results. This would then cause wrong results.

Pull Request resolved: facebookincubator#10742

Original commit changeset: 23ef4c590eba

Original Phabricator Diff: D60064934

Reviewed By: kevinwilfong

Differential Revision: D61240010

fbshipit-source-id: d19eb30c33cdfbbd16a58a2ae315da3af1f85d20
  • Loading branch information
Bikramjeet Vig authored and facebook-github-bot committed Aug 14, 2024
1 parent 3e02902 commit d176894
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 214 deletions.
69 changes: 2 additions & 67 deletions velox/expression/PeeledEncoding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,7 @@ namespace facebook::velox::exec {
SelectivityVector* PeeledEncoding::translateToInnerRows(
const SelectivityVector& outerRows,
LocalSelectivityVector& innerRowsHolder) const {
if (wrapEncoding_ == VectorEncoding::Simple::FLAT) {
return innerRowsHolder.get(outerRows);
}
VELOX_CHECK(wrapEncoding_ != VectorEncoding::Simple::FLAT);
if (wrapEncoding_ == VectorEncoding::Simple::CONSTANT) {
auto newRows = innerRowsHolder.get(constantWrapIndex_ + 1, false);
newRows->setValid(constantWrapIndex_, true);
Expand Down Expand Up @@ -111,61 +109,6 @@ void PeeledEncoding::setDictionaryWrapping(
wrapNulls_ = std::move(wrapping.nulls);
}

void PeeledEncoding::flattenPeeledVectors(
const SelectivityVector& rows,
const std::vector<bool>& constantFields,
std::vector<VectorPtr>& peeledVectors) {
VELOX_CHECK(flattenPeeled_.empty());
for (int i = 0; i < peeledVectors.size(); ++i) {
if (!constantFields.empty() && constantFields[i]) {
continue;
}
if (peeledVectors[i]->isConstantEncoding() && !wrapNulls_) {
continue;
}
switch (peeledVectors[i]->typeKind()) {
case TypeKind::ARRAY:
case TypeKind::MAP:
case TypeKind::ROW:
return;
default:
if (isLazyNotLoaded(*peeledVectors[i])) {
return;
}
}
}
auto nonNullRows = rows;
if (wrapNulls_) {
nonNullRows.deselectNulls(
wrapNulls_->as<uint64_t>(), rows.begin(), rows.end());
}
for (int i = 0; i < peeledVectors.size(); ++i) {
if (!constantFields.empty() && constantFields[i]) {
continue;
}
if (peeledVectors[i]->isConstantEncoding() && !wrapNulls_) {
if (peeledVectors[i]->size() < rows.end()) {
peeledVectors[i] =
BaseVector::wrapInConstant(rows.end(), 0, peeledVectors[i]);
}
} else {
auto flat = BaseVector::create(
peeledVectors[i]->type(), rows.end(), peeledVectors[i]->pool());
flat->copy(
peeledVectors[i].get(), nonNullRows, wrap_->as<vector_size_t>());
if (!nonNullRows.isAllSelected()) {
bits::andBits(
flat->mutableRawNulls(), nonNullRows.allBits(), 0, rows.end());
}
peeledVectors[i] = flat;
flattenPeeled_.push_back(std::move(flat));
}
}
wrapEncoding_ = VectorEncoding::Simple::FLAT;
baseSize_ = rows.end();
wrap_.reset();
}

bool PeeledEncoding::peelInternal(
const std::vector<VectorPtr>& vectorsToPeel,
const SelectivityVector& rows,
Expand Down Expand Up @@ -280,12 +223,6 @@ bool PeeledEncoding::peelInternal(
constantWrapIndex_ = innerIdx;
} else {
setDictionaryWrapping(decodedVector, rows, *firstWrapper);
if (baseSize_ / 8 > rows.end()) {
// For large base vector, peeling would still need to allocate and
// initialize/copy the memory of unselected rows, resulting in large
// performance penalty, thus it's safer to do deep copy in this case.
flattenPeeledVectors(rows, constantFields, peeledVectors);
}
// Make sure all the constant vectors have at least the same length as the
// base vector after peeling. This will make sure any translated rows
// point to valid rows in the constant vector.
Expand All @@ -311,9 +248,7 @@ VectorPtr PeeledEncoding::wrap(
velox::memory::MemoryPool* pool,
VectorPtr peeledResult,
const SelectivityVector& rows) const {
if (wrapEncoding_ == VectorEncoding::Simple::FLAT) {
return peeledResult;
}
VELOX_CHECK(wrapEncoding_ != VectorEncoding::Simple::FLAT);
VectorPtr wrappedResult;
if (wrapEncoding_ == VectorEncoding::Simple::DICTIONARY) {
if (!peeledResult) {
Expand Down
69 changes: 20 additions & 49 deletions velox/expression/PeeledEncoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,38 +163,19 @@ class PeeledEncoding {
/// Takes a set of outer rows (rows defined over the peel) and a functor that
/// is executed for each outer row which does not map to a null row.
/// The outer and its corresponding inner row is passed to the functor.
template <typename F>
void applyToNonNullInnerRows(const SelectivityVector& outerRows, F&& func)
const {
auto* wrapNulls = wrapNulls_ ? wrapNulls_->as<uint64_t>() : nullptr;
switch (wrapEncoding_) {
case VectorEncoding::Simple::FLAT:
outerRows.applyToSelected([&](auto outerRow) {
if (wrapNulls && bits::isBitNull(wrapNulls, outerRow)) {
return;
}
func(outerRow, outerRow);
});
break;
case VectorEncoding::Simple::CONSTANT:
VELOX_CHECK_NULL(wrapNulls);
outerRows.applyToSelected(
[&](auto outerRow) { func(outerRow, constantWrapIndex_); });
break;
case VectorEncoding::Simple::DICTIONARY: {
auto indices = wrap_->as<vector_size_t>();
outerRows.applyToSelected([&](auto outerRow) {
// A known null in the outer row masks an error.
if (wrapNulls && bits::isBitNull(wrapNulls, outerRow)) {
return;
}
func(outerRow, indices[outerRow]);
});
break;
void applyToNonNullInnerRows(
const SelectivityVector& outerRows,
std::function<void(vector_size_t, vector_size_t)> func) const {
auto indices = wrap_ ? wrap_->as<vector_size_t>() : nullptr;
auto wrapNulls = wrapNulls_ ? wrapNulls_->as<uint64_t>() : nullptr;
outerRows.applyToSelected([&](auto outerRow) {
// A known null in the outer row masks an error.
if (wrapNulls && bits::isBitNull(wrapNulls, outerRow)) {
return;
}
default:
VELOX_UNREACHABLE();
}
vector_size_t innerRow = indices ? indices[outerRow] : constantWrapIndex_;
func(outerRow, innerRow);
});
}

private:
Expand All @@ -214,32 +195,22 @@ class PeeledEncoding {
const SelectivityVector& rows,
BaseVector& firstWrapper);

void flattenPeeledVectors(
const SelectivityVector& rows,
const std::vector<bool>& constantFields,
std::vector<VectorPtr>& peeledVectors);

// The encoding of the peel. Set after getPeeledVectors() is called. Is equal
// to Flat if getPeeledVectors() has not been called or peeling was not
// successful.
/// The encoding of the peel. Set after getPeeledVectors() is called. Is equal
/// to Flat if getPeeledVectors() has not been called or peeling was not
/// successful.
VectorEncoding::Simple wrapEncoding_ = VectorEncoding::Simple::FLAT;

// The dictionary indices. Only valid if wrapEncoding_ = DICTIONARY.
/// The dictionary indices. Only valid if wrapEncoding_ = DICTIONARY.
BufferPtr wrap_;

// The dictionary nulls. Only valid if wrapEncoding_ = DICTIONARY.
/// The dictionary nulls. Only valid if wrapEncoding_ = DICTIONARY.
BufferPtr wrapNulls_;

// The size of one of the peeled vectors. Only valid if wrapEncoding_ =
// DICTIONARY.
/// The size of one of the peeled vectors. Only valid if wrapEncoding_ =
/// DICTIONARY.
vector_size_t baseSize_ = 0;

// The constant index. Only valid if wrapEncoding_ = CONSTANT.
/// The constant index. Only valid if wrapEncoding_ = CONSTANT.
vector_size_t constantWrapIndex_ = 0;

// Deep copies of peeled vectors with low selectivity to avoid cost on unused
// rows. Stored here to bring their lifecycles no shorter than PeeledEncoding
// object.
std::vector<VectorPtr> flattenPeeled_;
};
} // namespace facebook::velox::exec
4 changes: 2 additions & 2 deletions velox/expression/tests/ExprTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2094,7 +2094,7 @@ TEST_F(ExprTest, memo) {
// 1. correctly evaluates the unevaluated rows on subsequent runs
// 2. Only caches results if it encounters the same base twice
auto base = makeArrayVector<int64_t>(
500,
1'000,
[](auto row) { return row % 5 + 1; },
[](auto row, auto index) { return (row % 3) + index; });

Expand Down Expand Up @@ -2142,7 +2142,7 @@ TEST_F(ExprTest, memo) {

// Create a new base
base = makeArrayVector<int64_t>(
500,
1'000,
[](auto row) { return row % 5 + 1; },
[](auto row, auto index) { return (row % 3) + index; });

Expand Down
Loading

0 comments on commit d176894

Please sign in to comment.