diff --git a/cpp/velox/memory/VeloxColumnarBatch.cc b/cpp/velox/memory/VeloxColumnarBatch.cc index da004340cd87..83428707b320 100644 --- a/cpp/velox/memory/VeloxColumnarBatch.cc +++ b/cpp/velox/memory/VeloxColumnarBatch.cc @@ -57,7 +57,7 @@ void VeloxColumnarBatch::ensureFlattened() { } // In case of output from Limit, RowVector size can be smaller than its children size. if (child->size() > rowVector_->size()) { - child->resize(rowVector_->size()); + child = child->slice(0, rowVector_->size()); } } flattened_ = true; diff --git a/cpp/velox/tests/CMakeLists.txt b/cpp/velox/tests/CMakeLists.txt index dff210b7f99f..58482fe1564e 100644 --- a/cpp/velox/tests/CMakeLists.txt +++ b/cpp/velox/tests/CMakeLists.txt @@ -17,13 +17,13 @@ function(add_velox_test TEST_EXEC) set(options) set(one_value_args) set(multi_value_args - SOURCES - ) + SOURCES + ) cmake_parse_arguments(ARG - "${options}" - "${one_value_args}" - "${multi_value_args}" - ${ARGN}) + "${options}" + "${one_value_args}" + "${multi_value_args}" + ${ARGN}) if(ARG_SOURCES) set(SOURCES ${ARG_SOURCES}) @@ -39,19 +39,25 @@ endfunction() add_velox_test(velox_shuffle_writer_test SOURCES VeloxShuffleWriterTest.cc) # TODO: ORC is not well supported. # add_velox_test(orc_test SOURCES OrcTest.cc) -add_velox_test(velox_operators_test SOURCES VeloxColumnarToRowTest.cc VeloxRowToColumnarTest.cc VeloxColumnarBatchSerializerTest.cc) add_velox_test( - velox_plan_conversion_test - SOURCES - Substrait2VeloxPlanConversionTest.cc - Substrait2VeloxPlanValidatorTest.cc - Substrait2VeloxValuesNodeConversionTest.cc - SubstraitExtensionCollectorTest.cc - VeloxSubstraitRoundTripTest.cc - VeloxSubstraitSignatureTest.cc - VeloxToSubstraitTypeTest.cc - FunctionTest.cc - JsonToProtoConverter.cc - FilePathGenerator.cc) + velox_operators_test + SOURCES + VeloxColumnarToRowTest.cc + VeloxRowToColumnarTest.cc + VeloxColumnarBatchSerializerTest.cc + VeloxColumnarBatchTest.cc) +add_velox_test( + velox_plan_conversion_test + SOURCES + Substrait2VeloxPlanConversionTest.cc + Substrait2VeloxPlanValidatorTest.cc + Substrait2VeloxValuesNodeConversionTest.cc + SubstraitExtensionCollectorTest.cc + VeloxSubstraitRoundTripTest.cc + VeloxSubstraitSignatureTest.cc + VeloxToSubstraitTypeTest.cc + FunctionTest.cc + JsonToProtoConverter.cc + FilePathGenerator.cc) add_velox_test(spark_functions_test SOURCES SparkFunctionTest.cc) add_velox_test(execution_ctx_test SOURCES RuntimeTest.cc) diff --git a/cpp/velox/tests/VeloxColumnarBatchTest.cc b/cpp/velox/tests/VeloxColumnarBatchTest.cc new file mode 100644 index 000000000000..559f9f047258 --- /dev/null +++ b/cpp/velox/tests/VeloxColumnarBatchTest.cc @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "memory/VeloxColumnarBatch.h" +#include "velox/vector/arrow/Bridge.h" +#include "velox/vector/tests/utils/VectorTestBase.h" + +using namespace facebook::velox; + +namespace gluten { +class VeloxColumnarBatchTest : public ::testing::Test, public test::VectorTestBase { + protected: + // Velox requires the mem manager to be instanced. + static void SetUpTestCase() { + memory::MemoryManager::testingSetInstance({}); + } + + std::shared_ptr veloxPool_ = defaultLeafVeloxMemoryPool(); +}; + +TEST_F(VeloxColumnarBatchTest, flattenTruncatedVector) { + vector_size_t inputSize = 1'00; + vector_size_t childSize = 1'000; + auto mapVector = makeMapVector( + childSize, [](auto row) { return 1; }, [](auto row) { return row; }, [](auto row) { return row; }); + auto mapKeys = mapVector->mapKeys(); + auto mapValues = mapVector->mapValues(); + + // First, make a row vector with the mapKeys and mapValues as children. + // Make the row vector size less than the children size. + auto input = std::make_shared( + veloxPool_.get(), + ROW({INTEGER(), BIGINT(), MAP(INTEGER(), BIGINT())}), + nullptr, + inputSize, + std::vector{mapKeys, mapValues}); + + auto batch = std::make_shared(input); + ASSERT_NO_THROW(batch->getFlattenedRowVector()); + + // Allocate a dummy indices and wrap the original mapVector with it as a dictionary, to force it get decoded in + // flattenVector. + auto indices = allocateIndices(childSize, veloxPool_.get()); + auto* rawIndices = indices->asMutable(); + for (vector_size_t i = 0; i < childSize; i++) { + rawIndices[i] = i; + } + auto encodedMapVector = BaseVector::wrapInDictionary(nullptr, indices, inputSize, mapVector); + auto inputOfMap = makeRowVector({encodedMapVector}); + auto batchOfMap = std::make_shared(inputOfMap); + ASSERT_NO_THROW(batchOfMap->getFlattenedRowVector()); +} +} // namespace gluten