Skip to content

Commit

Permalink
add opaque type support to vector fuzzer (#11189)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #11189

In preparation for supporting Opaque types in Presto page serialization, I'd like to add support to fuzz testing to them.

The idea is that each custom type will have to provide its own randomizing function, since it's otherwise impossible for the framework to do so.

Reviewed By: kagamiori

Differential Revision: D63998462

fbshipit-source-id: c3f18ae6d660cf105f7b13a9fce842f8ceeec0c8
  • Loading branch information
Guilherme Kunigami authored and facebook-github-bot committed Oct 29, 2024
1 parent ca8dba4 commit 48f6b8d
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 2 deletions.
33 changes: 31 additions & 2 deletions velox/vector/fuzzer/VectorFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -415,9 +415,12 @@ VectorPtr VectorFuzzer::fuzz(const TypePtr& type, vector_size_t size) {
// 20% chance of adding a constant vector.
if (coinToss(0.2)) {
vector = fuzzConstant(type, vectorSize);
} else if (type->isPrimitiveType()) {
vector = fuzzFlatPrimitive(type, vectorSize);
} else if (type->isOpaque()) {
vector = fuzzFlatOpaque(type, vectorSize);
} else {
vector = type->isPrimitiveType() ? fuzzFlatPrimitive(type, vectorSize)
: fuzzComplex(type, vectorSize);
vector = fuzzComplex(type, vectorSize);
}

if (vectorSize > size) {
Expand Down Expand Up @@ -558,6 +561,8 @@ VectorPtr VectorFuzzer::fuzzFlat(const TypePtr& type, vector_size_t size) {
}

return fuzzRow(std::move(childrenVectors), rowType.names(), size);
} else if (type->isOpaque()) {
return fuzzFlatOpaque(type, size);
} else {
VELOX_UNREACHABLE();
}
Expand Down Expand Up @@ -629,6 +634,30 @@ VectorPtr VectorFuzzer::fuzzComplex(const TypePtr& type, vector_size_t size) {
return nullptr; // no-op.
}

VectorPtr VectorFuzzer::fuzzFlatOpaque(
const TypePtr& type,
vector_size_t size) {
VELOX_CHECK(type->isOpaque());
auto vector = BaseVector::create(type, size, pool_);
using TFlat = typename KindToFlatVector<TypeKind::OPAQUE>::type;

auto& opaqueType = type->asOpaque();
auto flatVector = vector->as<TFlat>();
auto it = opaqueTypeGenerators_.find(opaqueType.typeIndex());
VELOX_CHECK(
it != opaqueTypeGenerators_.end(),
"generator does not exist for type index. Did you call registerOpaqueTypeGenerator()?");
auto& opaqueTypeGenerator = it->second;
for (vector_size_t i = 0; i < vector->size(); ++i) {
if (coinToss(opts_.nullRatio)) {
flatVector->setNull(i, true);
} else {
flatVector->set(i, opaqueTypeGenerator(rng_));
}
}
return vector;
}

VectorPtr VectorFuzzer::fuzzDictionary(const VectorPtr& vector) {
return fuzzDictionary(vector, vector->size());
}
Expand Down
20 changes: 20 additions & 0 deletions velox/vector/fuzzer/VectorFuzzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -335,10 +335,21 @@ class VectorFuzzer {
/// pointing to (baseVectorSize-1).
BufferPtr fuzzIndices(vector_size_t size, vector_size_t baseVectorSize);

template <typename Class>
void registerOpaqueTypeGenerator(
std::function<std::shared_ptr<Class>(FuzzerGenerator& rng)> generator) {
opaqueTypeGenerators_[std::type_index(typeid(Class))] = generator;
}

private:
// Generates a flat vector for primitive types.
VectorPtr fuzzFlatPrimitive(const TypePtr& type, vector_size_t size);

// Generates a flat vector for opaque types.
// Throws if the type is not OpaqueType<Class>.
// Expects registerOpaqueTypeGenerator<Class>() to be called beforehand.
VectorPtr fuzzFlatOpaque(const TypePtr& type, vector_size_t size);

// Generates random precision in range [1, maxPrecision]
// and scale in range [0, random precision generated].
// @param maximum precision.
Expand Down Expand Up @@ -374,6 +385,15 @@ class VectorFuzzer {
// function. C++ does not guarantee the order in which arguments are
// evaluated, which can lead to inconsistent results across platforms.
FuzzerGenerator rng_;

// Since the underlying type of opaque types are transparent to Velox, we
// require callers to register a generator for each underlying type, so we're
// able to generate random data for opaque types.
// This is done via registerOpaqueTypeGenerator().
std::unordered_map<
std::type_index,
std::function<std::shared_ptr<void>(FuzzerGenerator& rng)>>
opaqueTypeGenerators_;
};

/// Generates a random type, including maps, structs, and arrays. maxDepth
Expand Down
66 changes: 66 additions & 0 deletions velox/vector/fuzzer/tests/VectorFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
* limitations under the License.
*/

#include <boost/random/uniform_int_distribution.hpp>
#include <gmock/gmock.h>
#include <gtest/gtest.h>

Expand Down Expand Up @@ -173,6 +174,71 @@ TEST_F(VectorFuzzerTest, flatNotNull) {
ASSERT_FALSE(vector->mayHaveNulls());
}

struct Foo {
explicit Foo(int64_t id) : id_(id) {}
int64_t id_;
};
struct Bar {};

TEST_F(VectorFuzzerTest, flatOpaque) {
// Exercises fuzzer.flatOpaque()
VectorFuzzer::Options opts;
opts.nullRatio = 0.5;
VectorFuzzer fuzzer(opts, pool());
fuzzer.registerOpaqueTypeGenerator<Foo>([](FuzzerGenerator& rng) {
int64_t id = boost::random::uniform_int_distribution<int64_t>(1, 10)(rng);
return std::make_shared<Foo>(id);
});
fuzzer.registerOpaqueTypeGenerator<Bar>([](FuzzerGenerator& rng) {
throw std::runtime_error("Should not be called");
return std::make_shared<Bar>();
});

auto opaqueType = OPAQUE<Foo>();
VectorPtr vector = fuzzer.fuzzFlat(opaqueType);
ASSERT_EQ(VectorEncoding::Simple::FLAT, vector->encoding());
ASSERT_TRUE(vector->type()->kindEquals(opaqueType));
ASSERT_EQ(opts.vectorSize, vector->size());
ASSERT_TRUE(vector->mayHaveNulls());

auto flatVector = vector->asFlatVector<std::shared_ptr<void>>();
for (auto i = 0; i < vector->size(); ++i) {
if (flatVector->isNullAt(i)) {
continue;
}
auto element = std::reinterpret_pointer_cast<Foo>(flatVector->valueAt(i));
ASSERT_GT(element->id_, 0);
ASSERT_LT(element->id_, 11);
}
}

TEST_F(VectorFuzzerTest, opaque) {
// Exercises fuzzer.fuzz() for opaque types.
VectorFuzzer::Options opts;
opts.nullRatio = 0.5;
VectorFuzzer fuzzer(opts, pool());
fuzzer.registerOpaqueTypeGenerator<Foo>([](FuzzerGenerator& rng) {
int64_t id = boost::random::uniform_int_distribution<int64_t>(1, 10)(rng);
return std::make_shared<Foo>(id);
});
fuzzer.registerOpaqueTypeGenerator<Bar>([](FuzzerGenerator& rng) {
throw std::runtime_error("Should not be called");
return std::make_shared<Bar>();
});

auto opaqueType = OPAQUE<Foo>();
VectorPtr vector = fuzzer.fuzz(opaqueType, opts.vectorSize);
// There's a chance of the vector being wrapped in a dictionary or made into a
// constant.
ASSERT_TRUE(
vector->encoding() == VectorEncoding::Simple::DICTIONARY ||
vector->encoding() == VectorEncoding::Simple::FLAT ||
vector->encoding() == VectorEncoding::Simple::CONSTANT);
ASSERT_TRUE(vector->type()->kindEquals(opaqueType));
ASSERT_EQ(opts.vectorSize, vector->size());
ASSERT_TRUE(vector->mayHaveNulls());
}

TEST_F(VectorFuzzerTest, dictionary) {
VectorFuzzer::Options opts;
VectorFuzzer fuzzer(opts, pool());
Expand Down

0 comments on commit 48f6b8d

Please sign in to comment.