-
Notifications
You must be signed in to change notification settings - Fork 445
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[VL] Use slice instead of resize in ensureFlattened (#5523)
The children of input RowVector, can reference same BaseVector before and after flattening. e.g. select explode(a), a from ..., which a is of map type will output 3 columns: col_0: a.mapKeys col_1: a.mapValues and col_2: a. In this case, col_0/co_1 is referencing the same BaseVector as the mapKeys/mapValues in col_2. Resizing col_0/col_1 will cause expected failure in accessing col_2.
- Loading branch information
Showing
3 changed files
with
93 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include "memory/VeloxColumnarBatch.h" | ||
#include "velox/vector/arrow/Bridge.h" | ||
#include "velox/vector/tests/utils/VectorTestBase.h" | ||
|
||
using namespace facebook::velox; | ||
|
||
namespace gluten { | ||
class VeloxColumnarBatchTest : public ::testing::Test, public test::VectorTestBase { | ||
protected: | ||
// Velox requires the mem manager to be instanced. | ||
static void SetUpTestCase() { | ||
memory::MemoryManager::testingSetInstance({}); | ||
} | ||
|
||
std::shared_ptr<memory::MemoryPool> veloxPool_ = defaultLeafVeloxMemoryPool(); | ||
}; | ||
|
||
TEST_F(VeloxColumnarBatchTest, flattenTruncatedVector) { | ||
vector_size_t inputSize = 1'00; | ||
vector_size_t childSize = 1'000; | ||
auto mapVector = makeMapVector<int32_t, int64_t>( | ||
childSize, [](auto row) { return 1; }, [](auto row) { return row; }, [](auto row) { return row; }); | ||
auto mapKeys = mapVector->mapKeys(); | ||
auto mapValues = mapVector->mapValues(); | ||
|
||
// First, make a row vector with the mapKeys and mapValues as children. | ||
// Make the row vector size less than the children size. | ||
auto input = std::make_shared<RowVector>( | ||
veloxPool_.get(), | ||
ROW({INTEGER(), BIGINT(), MAP(INTEGER(), BIGINT())}), | ||
nullptr, | ||
inputSize, | ||
std::vector<VectorPtr>{mapKeys, mapValues}); | ||
|
||
auto batch = std::make_shared<VeloxColumnarBatch>(input); | ||
ASSERT_NO_THROW(batch->getFlattenedRowVector()); | ||
|
||
// Allocate a dummy indices and wrap the original mapVector with it as a dictionary, to force it get decoded in | ||
// flattenVector. | ||
auto indices = allocateIndices(childSize, veloxPool_.get()); | ||
auto* rawIndices = indices->asMutable<vector_size_t>(); | ||
for (vector_size_t i = 0; i < childSize; i++) { | ||
rawIndices[i] = i; | ||
} | ||
auto encodedMapVector = BaseVector::wrapInDictionary(nullptr, indices, inputSize, mapVector); | ||
auto inputOfMap = makeRowVector({encodedMapVector}); | ||
auto batchOfMap = std::make_shared<VeloxColumnarBatch>(inputOfMap); | ||
ASSERT_NO_THROW(batchOfMap->getFlattenedRowVector()); | ||
} | ||
} // namespace gluten |