Skip to content

Commit

Permalink
Fix fuzzer option containerHasNulls meaning (facebookincubator#7667)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: facebookincubator#7667

containerHasNulls used to control whether top-level container values can be
null, and the container elements themselves, as the name suggests. With this
change, I removed the reference to the flag to callsites where nullRatio is
already zero (and hence this is meaningless), check that other callsites are
covered and work well with the new meaning, and implemented the new semantic
described above.
Added unit tests to ensure it works as expected.

Reviewed By: xiaoxmeng, mbasmanova

Differential Revision: D51487492

fbshipit-source-id: 2e800da9f4981c24e58997ff882143b7d72f7b7c
  • Loading branch information
pedroerp authored and facebook-github-bot committed Nov 22, 2023
1 parent 5fda4b4 commit 81e8924
Show file tree
Hide file tree
Showing 7 changed files with 154 additions and 47 deletions.
1 change: 0 additions & 1 deletion velox/row/tests/CompactRowTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,6 @@ TEST_F(CompactRowTest, fuzz) {
opts.vectorSize = 100;
opts.containerLength = 5;
opts.nullRatio = 0.1;
opts.containerHasNulls = true;
opts.dictionaryHasNulls = false;
opts.stringVariableLength = true;
opts.stringLength = 20;
Expand Down
1 change: 0 additions & 1 deletion velox/row/tests/UnsafeRowFuzzTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ class UnsafeRowFuzzTests : public ::testing::Test {
VectorFuzzer::Options opts;
opts.vectorSize = kNumBuffers;
opts.nullRatio = 0.1;
opts.containerHasNulls = false;
opts.dictionaryHasNulls = false;
opts.stringVariableLength = true;
opts.stringLength = 20;
Expand Down
3 changes: 1 addition & 2 deletions velox/serializers/tests/CompactRowSerializerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ TEST_F(CompactRowSerializerTest, fuzz) {
VectorFuzzer::Options opts;
opts.vectorSize = 5;
opts.nullRatio = 0.1;
opts.containerHasNulls = false;
opts.dictionaryHasNulls = false;
opts.stringVariableLength = true;
opts.stringLength = 20;
Expand All @@ -129,7 +128,7 @@ TEST_F(CompactRowSerializerTest, fuzz) {
SCOPED_TRACE(fmt::format("seed: {}", seed));
VectorFuzzer fuzzer(opts, pool_.get(), seed);

auto data = fuzzer.fuzzRow(rowType);
auto data = fuzzer.fuzzInputRow(rowType);
testRoundTrip(data);
}

Expand Down
3 changes: 1 addition & 2 deletions velox/serializers/tests/UnsafeRowSerializerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,6 @@ TEST_F(UnsafeRowSerializerTest, types) {
VectorFuzzer::Options opts;
opts.vectorSize = 5;
opts.nullRatio = 0.1;
opts.containerHasNulls = false;
opts.dictionaryHasNulls = false;
opts.stringVariableLength = true;
opts.stringLength = 20;
Expand All @@ -319,7 +318,7 @@ TEST_F(UnsafeRowSerializerTest, types) {
SCOPED_TRACE(fmt::format("seed: {}", seed));
VectorFuzzer fuzzer(opts, pool_.get(), seed);

auto data = fuzzer.fuzzRow(rowType);
auto data = fuzzer.fuzzInputRow(rowType);
testRoundTrip(data);
}

Expand Down
88 changes: 51 additions & 37 deletions velox/vector/fuzzer/VectorFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -463,24 +463,28 @@ VectorPtr VectorFuzzer::fuzzFlat(const TypePtr& type, vector_size_t size) {
}
// Arrays.
else if (type->isArray()) {
return fuzzArray(
fuzzFlat(
type->asArray().elementType(),
getElementsVectorLength(opts_, size)),
size);
const auto& elementType = type->asArray().elementType();
auto elementsLength = getElementsVectorLength(opts_, size);

auto elements = opts_.containerHasNulls
? fuzzFlat(elementType, elementsLength)
: fuzzFlatNotNull(elementType, elementsLength);
return fuzzArray(elements, size);
}
// Maps.
else if (type->isMap()) {
// Do not initialize keys and values inline in the fuzzMap call as C++ does
// not specify the order they'll be called in, leading to inconsistent
// results across platforms.
auto keys = opts_.normalizeMapKeys
? fuzzFlatNotNull(
type->asMap().keyType(), getElementsVectorLength(opts_, size))
: fuzzFlat(
type->asMap().keyType(), getElementsVectorLength(opts_, size));
auto values = fuzzFlat(
type->asMap().valueType(), getElementsVectorLength(opts_, size));
const auto& keyType = type->asMap().keyType();
const auto& valueType = type->asMap().valueType();
auto length = getElementsVectorLength(opts_, size);

auto keys = opts_.normalizeMapKeys || !opts_.containerHasNulls
? fuzzFlatNotNull(keyType, length)
: fuzzFlat(keyType, length);
auto values = opts_.containerHasNulls ? fuzzFlat(valueType, length)
: fuzzFlatNotNull(valueType, length);
return fuzzMap(keys, values, size);
}
// Rows.
Expand All @@ -490,7 +494,9 @@ VectorPtr VectorFuzzer::fuzzFlat(const TypePtr& type, vector_size_t size) {
childrenVectors.reserve(rowType.children().size());

for (const auto& childType : rowType.children()) {
childrenVectors.emplace_back(fuzzFlat(childType, size));
childrenVectors.emplace_back(
opts_.containerHasNulls ? fuzzFlat(childType, size)
: fuzzFlatNotNull(childType, size));
}

return fuzzRow(std::move(childrenVectors), rowType.names(), size);
Expand Down Expand Up @@ -533,23 +539,29 @@ VectorPtr VectorFuzzer::fuzzComplex(const TypePtr& type, vector_size_t size) {
case TypeKind::ROW:
return fuzzRow(std::dynamic_pointer_cast<const RowType>(type), size);

case TypeKind::ARRAY:
return fuzzArray(
fuzz(
type->asArray().elementType(),
getElementsVectorLength(opts_, size)),
size);
case TypeKind::ARRAY: {
const auto& elementType = type->asArray().elementType();
auto elementsLength = getElementsVectorLength(opts_, size);

auto elements = opts_.containerHasNulls
? fuzz(elementType, elementsLength)
: fuzzNotNull(elementType, elementsLength);
return fuzzArray(elements, size);
}

case TypeKind::MAP: {
// Do not initialize keys and values inline in the fuzzMap call as C++
// does not specify the order they'll be called in, leading to
// inconsistent results across platforms.
auto keys = opts_.normalizeMapKeys
? fuzzNotNull(
type->asMap().keyType(), getElementsVectorLength(opts_, size))
: fuzz(type->asMap().keyType(), getElementsVectorLength(opts_, size));
auto values =
fuzz(type->asMap().valueType(), getElementsVectorLength(opts_, size));
const auto& keyType = type->asMap().keyType();
const auto& valueType = type->asMap().valueType();
auto length = getElementsVectorLength(opts_, size);

auto keys = opts_.normalizeMapKeys || !opts_.containerHasNulls
? fuzzNotNull(keyType, length)
: fuzz(keyType, length);
auto values = opts_.containerHasNulls ? fuzz(valueType, length)
: fuzzNotNull(valueType, length);
return fuzzMap(keys, values, size);
}

Expand Down Expand Up @@ -619,7 +631,7 @@ ArrayVectorPtr VectorFuzzer::fuzzArray(
return std::make_shared<ArrayVector>(
pool_,
ARRAY(elements->type()),
opts_.containerHasNulls ? fuzzNulls(size) : nullptr,
fuzzNulls(size),
size,
offsets,
sizes,
Expand Down Expand Up @@ -676,7 +688,7 @@ MapVectorPtr VectorFuzzer::fuzzMap(
return std::make_shared<MapVector>(
pool_,
MAP(keys->type(), values->type()),
opts_.containerHasNulls ? fuzzNulls(size) : nullptr,
fuzzNulls(size),
size,
offsets,
sizes,
Expand Down Expand Up @@ -715,7 +727,7 @@ RowVectorPtr VectorFuzzer::fuzzRow(
return std::make_shared<RowVector>(
pool_,
ROW(std::move(childrenNames), std::move(types)),
opts_.containerHasNulls ? fuzzNulls(size) : nullptr,
fuzzNulls(size),
size,
std::move(children));
}
Expand All @@ -731,11 +743,7 @@ RowVectorPtr VectorFuzzer::fuzzRow(
}

return std::make_shared<RowVector>(
pool_,
ROW(std::move(types)),
opts_.containerHasNulls ? fuzzNulls(size) : nullptr,
size,
std::move(children));
pool_, ROW(std::move(types)), fuzzNulls(size), size, std::move(children));
}

RowVectorPtr VectorFuzzer::fuzzRow(const RowTypePtr& rowType) {
Expand All @@ -749,14 +757,20 @@ RowVectorPtr VectorFuzzer::fuzzRow(
vector_size_t size,
bool allowTopLevelNulls) {
std::vector<VectorPtr> children;
children.reserve(rowType->size());

for (auto i = 0; i < rowType->size(); ++i) {
children.push_back(fuzz(rowType->childAt(i), size));
children.push_back(
opts_.containerHasNulls ? fuzz(rowType->childAt(i), size)
: fuzzNotNull(rowType->childAt(i), size));
}

auto nulls =
allowTopLevelNulls && opts_.containerHasNulls ? fuzzNulls(size) : nullptr;
return std::make_shared<RowVector>(
pool_, rowType, nulls, size, std::move(children));
pool_,
rowType,
allowTopLevelNulls ? fuzzNulls(size) : nullptr,
size,
std::move(children));
}

BufferPtr VectorFuzzer::fuzzNulls(vector_size_t size) {
Expand Down
11 changes: 7 additions & 4 deletions velox/vector/fuzzer/VectorFuzzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,14 @@ class VectorFuzzer {
/// double between 0 and 1).
double nullRatio{0};

/// If true, fuzzer will generate top-level nulls for containers
/// (arrays/maps/rows), i.e, nulls for the containers themselves, not the
/// elements.
/// If false, fuzzer will not generate nulls for elements within containers
/// (arrays/maps/rows). It might still generate null for the container
/// itself.
///
/// The amount of nulls are controlled by `nullRatio`.
/// The amount of nulls (if true) is controlled by `nullRatio`.
///
/// If you want to prevent the top-level row containers from being null, see
/// `fuzzInputRow()` instead.
bool containerHasNulls{true};

/// If true, fuzzer will generate top-level nulls for dictionaries.
Expand Down
94 changes: 94 additions & 0 deletions velox/vector/fuzzer/tests/VectorFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,100 @@ TEST_F(VectorFuzzerTest, row) {
vector->type()->asRow().names(), ::testing::ElementsAre("c0", "c1"));
}

TEST_F(VectorFuzzerTest, containerHasNulls) {
auto countNulls = [](const VectorPtr& vec) {
if (!vec->nulls()) {
return 0;
}
return BaseVector::countNulls(vec->nulls(), vec->size());
};

VectorFuzzer::Options opts;
opts.vectorSize = 1000;
opts.nullRatio = 0.5;
opts.normalizeMapKeys = false;
opts.containerHasNulls = true;

{
VectorFuzzer fuzzer(opts, pool());

auto arrayVector = fuzzer.fuzz(ARRAY(BIGINT()));
auto mapVector = fuzzer.fuzz(MAP(BIGINT(), BIGINT()));
auto rowVector = fuzzer.fuzz(ROW({BIGINT(), BIGINT()}));

// Check that both top level and elements have nulls.
EXPECT_GT(countNulls(arrayVector), 0);
EXPECT_GT(countNulls(mapVector), 0);
EXPECT_GT(countNulls(rowVector), 0);

auto arrayElements = arrayVector->as<ArrayVector>()->elements();
auto mapKeys = mapVector->as<MapVector>()->mapKeys();
auto mapValues = mapVector->as<MapVector>()->mapValues();
auto rowCol0 = rowVector->as<RowVector>()->childAt(0);
auto rowCol1 = rowVector->as<RowVector>()->childAt(1);

EXPECT_GT(countNulls(arrayElements), 0);
EXPECT_GT(countNulls(mapKeys), 0);
EXPECT_GT(countNulls(mapValues), 0);
EXPECT_GT(countNulls(rowCol0), 0);
EXPECT_GT(countNulls(rowCol1), 0);
}

// Test with containerHasNulls false.
{
opts.containerHasNulls = false;
VectorFuzzer fuzzer(opts, pool());

auto arrayVector = fuzzer.fuzz(ARRAY(BIGINT()));
auto mapVector = fuzzer.fuzz(MAP(BIGINT(), BIGINT()));
auto rowVector = fuzzer.fuzz(ROW({BIGINT(), BIGINT()}));

// Check that both top level and elements have nulls.
EXPECT_GT(countNulls(arrayVector), 0);
EXPECT_GT(countNulls(mapVector), 0);
EXPECT_GT(countNulls(rowVector), 0);

auto arrayElements = arrayVector->as<ArrayVector>()->elements();
auto mapKeys = mapVector->as<MapVector>()->mapKeys();
auto mapValues = mapVector->as<MapVector>()->mapValues();
auto rowCol0 = rowVector->as<RowVector>()->childAt(0);
auto rowCol1 = rowVector->as<RowVector>()->childAt(1);

EXPECT_EQ(countNulls(arrayElements), 0);
EXPECT_EQ(countNulls(mapKeys), 0);
EXPECT_EQ(countNulls(mapValues), 0);
EXPECT_EQ(countNulls(rowCol0), 0);
EXPECT_EQ(countNulls(rowCol1), 0);
}

// Test with containerHasNulls false. Flat vector version.
{
opts.containerHasNulls = false;
VectorFuzzer fuzzer(opts, pool());

auto arrayVector = fuzzer.fuzzFlat(ARRAY(BIGINT()));
auto mapVector = fuzzer.fuzzFlat(MAP(BIGINT(), BIGINT()));
auto rowVector = fuzzer.fuzzFlat(ROW({BIGINT(), BIGINT()}));

// Check that both top level and elements have nulls.
EXPECT_GT(countNulls(arrayVector), 0);
EXPECT_GT(countNulls(mapVector), 0);
EXPECT_GT(countNulls(rowVector), 0);

auto arrayElements = arrayVector->as<ArrayVector>()->elements();
auto mapKeys = mapVector->as<MapVector>()->mapKeys();
auto mapValues = mapVector->as<MapVector>()->mapValues();
auto rowCol0 = rowVector->as<RowVector>()->childAt(0);
auto rowCol1 = rowVector->as<RowVector>()->childAt(1);

EXPECT_EQ(countNulls(arrayElements), 0);
EXPECT_EQ(countNulls(mapKeys), 0);
EXPECT_EQ(countNulls(mapValues), 0);
EXPECT_EQ(countNulls(rowCol0), 0);
EXPECT_EQ(countNulls(rowCol1), 0);
}
}

FlatVectorPtr<Timestamp> genTimestampVector(
VectorFuzzer::Options::TimestampPrecision precision,
size_t vectorSize,
Expand Down

0 comments on commit 81e8924

Please sign in to comment.