Skip to content

Commit

Permalink
[VL] Use slice instead of resize in ensureFlattened (#5523)
Browse files Browse the repository at this point in the history
The children of input RowVector, can reference same BaseVector before and after flattening. e.g. select explode(a), a from ..., which a is of map type will output 3 columns: col_0: a.mapKeys col_1: a.mapValues and col_2: a. In this case, col_0/co_1 is referencing the same BaseVector as the mapKeys/mapValues in col_2. Resizing col_0/col_1 will cause expected failure in accessing col_2.
  • Loading branch information
marin-ma authored Apr 25, 2024
1 parent 4697297 commit 8ade7a9
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 20 deletions.
2 changes: 1 addition & 1 deletion cpp/velox/memory/VeloxColumnarBatch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ void VeloxColumnarBatch::ensureFlattened() {
}
// In case of output from Limit, RowVector size can be smaller than its children size.
if (child->size() > rowVector_->size()) {
child->resize(rowVector_->size());
child = child->slice(0, rowVector_->size());
}
}
flattened_ = true;
Expand Down
44 changes: 25 additions & 19 deletions cpp/velox/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ function(add_velox_test TEST_EXEC)
set(options)
set(one_value_args)
set(multi_value_args
SOURCES
)
SOURCES
)
cmake_parse_arguments(ARG
"${options}"
"${one_value_args}"
"${multi_value_args}"
${ARGN})
"${options}"
"${one_value_args}"
"${multi_value_args}"
${ARGN})

if(ARG_SOURCES)
set(SOURCES ${ARG_SOURCES})
Expand All @@ -39,19 +39,25 @@ endfunction()
add_velox_test(velox_shuffle_writer_test SOURCES VeloxShuffleWriterTest.cc)
# TODO: ORC is not well supported.
# add_velox_test(orc_test SOURCES OrcTest.cc)
add_velox_test(velox_operators_test SOURCES VeloxColumnarToRowTest.cc VeloxRowToColumnarTest.cc VeloxColumnarBatchSerializerTest.cc)
add_velox_test(
velox_plan_conversion_test
SOURCES
Substrait2VeloxPlanConversionTest.cc
Substrait2VeloxPlanValidatorTest.cc
Substrait2VeloxValuesNodeConversionTest.cc
SubstraitExtensionCollectorTest.cc
VeloxSubstraitRoundTripTest.cc
VeloxSubstraitSignatureTest.cc
VeloxToSubstraitTypeTest.cc
FunctionTest.cc
JsonToProtoConverter.cc
FilePathGenerator.cc)
velox_operators_test
SOURCES
VeloxColumnarToRowTest.cc
VeloxRowToColumnarTest.cc
VeloxColumnarBatchSerializerTest.cc
VeloxColumnarBatchTest.cc)
add_velox_test(
velox_plan_conversion_test
SOURCES
Substrait2VeloxPlanConversionTest.cc
Substrait2VeloxPlanValidatorTest.cc
Substrait2VeloxValuesNodeConversionTest.cc
SubstraitExtensionCollectorTest.cc
VeloxSubstraitRoundTripTest.cc
VeloxSubstraitSignatureTest.cc
VeloxToSubstraitTypeTest.cc
FunctionTest.cc
JsonToProtoConverter.cc
FilePathGenerator.cc)
add_velox_test(spark_functions_test SOURCES SparkFunctionTest.cc)
add_velox_test(execution_ctx_test SOURCES RuntimeTest.cc)
67 changes: 67 additions & 0 deletions cpp/velox/tests/VeloxColumnarBatchTest.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "memory/VeloxColumnarBatch.h"
#include "velox/vector/arrow/Bridge.h"
#include "velox/vector/tests/utils/VectorTestBase.h"

using namespace facebook::velox;

namespace gluten {
class VeloxColumnarBatchTest : public ::testing::Test, public test::VectorTestBase {
protected:
// Velox requires the mem manager to be instanced.
static void SetUpTestCase() {
memory::MemoryManager::testingSetInstance({});
}

std::shared_ptr<memory::MemoryPool> veloxPool_ = defaultLeafVeloxMemoryPool();
};

TEST_F(VeloxColumnarBatchTest, flattenTruncatedVector) {
vector_size_t inputSize = 1'00;
vector_size_t childSize = 1'000;
auto mapVector = makeMapVector<int32_t, int64_t>(
childSize, [](auto row) { return 1; }, [](auto row) { return row; }, [](auto row) { return row; });
auto mapKeys = mapVector->mapKeys();
auto mapValues = mapVector->mapValues();

// First, make a row vector with the mapKeys and mapValues as children.
// Make the row vector size less than the children size.
auto input = std::make_shared<RowVector>(
veloxPool_.get(),
ROW({INTEGER(), BIGINT(), MAP(INTEGER(), BIGINT())}),
nullptr,
inputSize,
std::vector<VectorPtr>{mapKeys, mapValues});

auto batch = std::make_shared<VeloxColumnarBatch>(input);
ASSERT_NO_THROW(batch->getFlattenedRowVector());

// Allocate a dummy indices and wrap the original mapVector with it as a dictionary, to force it get decoded in
// flattenVector.
auto indices = allocateIndices(childSize, veloxPool_.get());
auto* rawIndices = indices->asMutable<vector_size_t>();
for (vector_size_t i = 0; i < childSize; i++) {
rawIndices[i] = i;
}
auto encodedMapVector = BaseVector::wrapInDictionary(nullptr, indices, inputSize, mapVector);
auto inputOfMap = makeRowVector({encodedMapVector});
auto batchOfMap = std::make_shared<VeloxColumnarBatch>(inputOfMap);
ASSERT_NO_THROW(batchOfMap->getFlattenedRowVector());
}
} // namespace gluten

0 comments on commit 8ade7a9

Please sign in to comment.