diff --git a/velox/vector/fuzzer/CMakeLists.txt b/velox/vector/fuzzer/CMakeLists.txt index 43b8cc83746d..20c096b48434 100644 --- a/velox/vector/fuzzer/CMakeLists.txt +++ b/velox/vector/fuzzer/CMakeLists.txt @@ -12,15 +12,30 @@ # See the License for the specific language governing permissions and # limitations under the License. -add_library(velox_vector_fuzzer GeneratorSpec.cpp Utils.cpp VectorFuzzer.cpp) +add_library(velox_vector_fuzzer_util Utils.cpp) target_link_libraries( - velox_vector_fuzzer velox_type velox_vector) + velox_vector_fuzzer_util velox_vector) + +add_library(velox_vector_fuzzer GeneratorSpec.cpp VectorFuzzer.cpp) + +target_link_libraries( + velox_vector_fuzzer velox_type velox_vector velox_vector_fuzzer_util) if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") target_compile_options(velox_vector_fuzzer PRIVATE -Wno-deprecated-declarations) endif() +velox_add_library(velox_fuzzer_constrained_input_generators + ConstrainedGenerators.cpp ConstrainedVectorGenerator.cpp) + +velox_link_libraries( + velox_fuzzer_constrained_input_generators + Folly::folly + velox_expression + velox_type + velox_vector_fuzzer_util) + if(${VELOX_BUILD_TESTING}) add_subdirectory(tests) endif() diff --git a/velox/vector/fuzzer/ConstrainedGenerators.cpp b/velox/vector/fuzzer/ConstrainedGenerators.cpp new file mode 100644 index 000000000000..51583c5b9c7b --- /dev/null +++ b/velox/vector/fuzzer/ConstrainedGenerators.cpp @@ -0,0 +1,206 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/vector/fuzzer/ConstrainedGenerators.h" + +#include + +#include "velox/vector/fuzzer/Utils.h" + +namespace facebook::velox::fuzzer { + +// AbstractInputGenerator +AbstractInputGenerator::AbstractInputGenerator( + size_t seed, + const TypePtr& type, + std::unique_ptr&& next, + double nullRatio) + : type_{type}, next_{std::move(next)}, nullRatio_{nullRatio} { + rng_.seed(seed); +} + +// NotEqualConstrainedGenerator +variant NotEqualConstrainedGenerator::generate() { + variant value; + do { + value = next_->generate(); + } while (value == excludedValue_); + return value; +} + +// SetConstrainedGenerator +variant SetConstrainedGenerator::generate() { + const auto index = + boost::random::uniform_int_distribution(0, set_.size() - 1)(rng_); + return set_[index]; +} + +// JsonInputGenerator +folly::json::serialization_opts JsonInputGenerator::getSerializationOptions() { + folly::json::serialization_opts opts; + opts.allow_non_string_keys = true; + opts.allow_nan_inf = true; + if (makeRandomVariation_) { + opts.convert_int_keys = rand(rng_); + opts.pretty_formatting = rand(rng_); + opts.pretty_formatting_indent_width = rand(rng_, 0, 4); + opts.encode_non_ascii = rand(rng_); + opts.allow_trailing_comma = rand(rng_); + opts.sort_keys = rand(rng_); + opts.skip_invalid_utf8 = rand(rng_); + opts.parse_numbers_as_strings = rand(rng_); + } + return opts; +} + +variant JsonInputGenerator::generate() { + if (coinToss(rng_, nullRatio_)) { + return variant::null(type_->kind()); + } + + const auto object = objectGenerator_->generate(); + const folly::dynamic jsonObject = convertVariantToDynamic(object); + const auto jsonString = folly::json::serialize(jsonObject, opts_); + if (makeRandomVariation_ && coinToss(rng_, 0.5)) { + makeRandomVariation(jsonString); + } + return variant(jsonString); +} + +folly::dynamic JsonInputGenerator::convertVariantToDynamic( + const variant& object) { + if (object.isNull()) { + return folly::dynamic(); + } + + switch (object.kind()) { + case TypeKind::BOOLEAN: + return convertVariantToDynamicPrimitive(object); + case TypeKind::TINYINT: + return convertVariantToDynamicPrimitive(object); + case TypeKind::SMALLINT: + return convertVariantToDynamicPrimitive(object); + case TypeKind::INTEGER: + return convertVariantToDynamicPrimitive(object); + case TypeKind::BIGINT: + return convertVariantToDynamicPrimitive(object); + case TypeKind::REAL: + return convertVariantToDynamicPrimitive(object); + case TypeKind::DOUBLE: + return convertVariantToDynamicPrimitive(object); + case TypeKind::VARCHAR: + return convertVariantToDynamicPrimitive(object); + case TypeKind::VARBINARY: + return convertVariantToDynamicPrimitive(object); + case TypeKind::TIMESTAMP: + return convertVariantToDynamicPrimitive(object); + case TypeKind::HUGEINT: + return convertVariantToDynamicPrimitive(object); + case TypeKind::ARRAY: { + folly::dynamic array = folly::dynamic::array; + for (const auto& element : object.value()) { + array.push_back(convertVariantToDynamic(element)); + } + return array; + } + case TypeKind::MAP: { + folly::dynamic map = folly::dynamic::object; + for (const auto& [key, value] : object.value()) { + map[convertVariantToDynamic(key)] = convertVariantToDynamic(value); + } + return map; + } + case TypeKind::ROW: { + folly::dynamic array = folly::dynamic::array; + for (const auto& element : object.value()) { + array.push_back(convertVariantToDynamic(element)); + } + return array; + } + default: + VELOX_UNREACHABLE("Unsupported type"); + } +} + +std::vector getControlCharacters() { + static std::vector controlCharacters = { + "\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06", + "\x07", "\x08", "\x09", "\x0A", "\x0B", "\x0C", "\x0D", + "\x0E", "\x0F", "\x10", "\x11", "\x12", "\x13", "\x14", + "\x15", "\x16", "\x17", "\x18", "\x19", "\x1A", "\x1B", + "\x1C", "\x1D", "\x1E", "\x1F", "\x20", "\x7F", "\u0080", + "\u0081", "\u0082", "\u0083", "\u0084", "\u0085", "\u0086", "\u0087", + "\u0088", "\u0089", "\u008A", "\u008B", "\u008C", "\u008D", "\u008E", + "\u008F", "\u0090", "\u0091", "\u0092", "\u0093", "\u0094", "\u0095", + "\u0096", "\u0097", "\u0098", "\u0099", "\u009A", "\u009B", "\u009C", + "\u009D", "\u009E", "\u009F"}; + return controlCharacters; +}; + +void JsonInputGenerator::makeRandomVariation(std::string json) { + if (coinToss(rng_, 0.1)) { + const auto controlCharacters = getControlCharacters(); + const auto index = rand(rng_, 0, controlCharacters.size() - 1); + const auto& controlCharacter = controlCharacters[index]; + const auto indexToInsert = rand(rng_, 0, json.size()); + json.insert(indexToInsert, controlCharacter); + } else if (coinToss(rng_, 0.1)) { + const auto size = rand(rng_, 0, json.size()); + json.resize(size); + } +} + +// Utility functions +template +std::unique_ptr getRandomInputGeneratorPrimitive( + size_t seed, + const TypePtr& type, + double nullRatio) { + using T = typename TypeTraits::NativeType; + std::unique_ptr generator = + std::make_unique>(seed, type, nullRatio); + return generator; +} + +std::unique_ptr +getRandomInputGenerator(size_t seed, const TypePtr& type, double nullRatio) { + std::unique_ptr generator; + if (type->isPrimitiveType()) { + return VELOX_DYNAMIC_SCALAR_TEMPLATE_TYPE_DISPATCH( + getRandomInputGeneratorPrimitive, + false, + type->kind(), + seed, + type, + nullRatio); + } else if (type->isArray()) { + generator = std::make_unique>( + seed, type, nullRatio); + } else if (type->isMap()) { + generator = + std::make_unique>(seed, type, nullRatio); + + } else if (type->isRow()) { + generator = std::make_unique>( + seed, + type, + std::vector>{}, + nullRatio); + } + return generator; +} + +} // namespace facebook::velox::fuzzer diff --git a/velox/vector/fuzzer/ConstrainedGenerators.h b/velox/vector/fuzzer/ConstrainedGenerators.h new file mode 100644 index 000000000000..ec31ea70561e --- /dev/null +++ b/velox/vector/fuzzer/ConstrainedGenerators.h @@ -0,0 +1,389 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include "folly/json.h" + +#include "velox/type/Type.h" +#include "velox/type/Variant.h" +#include "velox/vector/fuzzer/Utils.h" + +namespace facebook::velox::fuzzer { + +using facebook::velox::variant; + +class AbstractInputGenerator { + public: + AbstractInputGenerator( + size_t seed, + const TypePtr& type, + std::unique_ptr&& next, + double nullRatio); + + virtual ~AbstractInputGenerator() = default; + + virtual variant generate() = 0; + + TypePtr type() const { + return type_; + } + + protected: + FuzzerGenerator rng_; + + TypePtr type_; + + std::unique_ptr next_; + + double nullRatio_; +}; + +std::unique_ptr +getRandomInputGenerator(size_t seed, const TypePtr& type, double nullRatio); + +template +class RandomInputGenerator : public AbstractInputGenerator { + public: + RandomInputGenerator(size_t seed, const TypePtr& type, double nullRatio) + : AbstractInputGenerator(seed, type, nullptr, nullRatio) {} + + ~RandomInputGenerator() override = default; + + variant generate() override { + if (coinToss(rng_, nullRatio_)) { + return variant::null(type_->kind()); + } + + if (type_->isDate()) { + return variant(randDate(rng_)); + } + return variant(rand(rng_)); + } +}; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +template +class RandomInputGenerator>> + : public AbstractInputGenerator { + public: + RandomInputGenerator>>( + size_t seed, + const TypePtr& type, + double nullRatio, + size_t maxLength = 20, + const std::vector& encodings = + {UTF8CharList::ASCII, + UTF8CharList::UNICODE_CASE_SENSITIVE, + UTF8CharList::EXTENDED_UNICODE, + UTF8CharList::MATHEMATICAL_SYMBOLS}) + : AbstractInputGenerator(seed, type, nullptr, nullRatio), + maxLength_{maxLength}, + encodings_{encodings} {} + + ~RandomInputGenerator>>() + override = default; + + variant generate() override { + if (coinToss(rng_, nullRatio_)) { + return variant::null(type_->kind()); + } + + const auto length = rand(rng_, 0, maxLength_); + std::wstring_convert, char16_t> converter; + std::string buf; + return variant(randString(rng_, length, encodings_, buf, converter)); + } + + private: + const size_t maxLength_; + + std::vector encodings_; +}; +#pragma GCC diagnostic pop + +template +class RandomInputGenerator>> + : public AbstractInputGenerator { + public: + RandomInputGenerator>>( + size_t seed, + const TypePtr& type, + double nullRatio, + size_t maxLength = 10, + std::unique_ptr&& elementGenerator = nullptr, + std::optional containAtIndex = std::nullopt, + std::unique_ptr&& containGenerator = nullptr) + : AbstractInputGenerator(seed, type, nullptr, nullRatio), + maxLength_{maxLength}, + elementGenerator_{ + elementGenerator + ? std::move(elementGenerator) + : getRandomInputGenerator(seed, type->childAt(0), nullRatio)}, + containAtIndex_{containAtIndex}, + containGenerator_{std::move(containGenerator)} {} + + ~RandomInputGenerator>>() + override = default; + + variant generate() override { + if (coinToss(rng_, nullRatio_)) { + return variant::null(TypeKind::ARRAY); + } + + const auto length = rand(rng_, 0, maxLength_); + std::vector elements; + elements.reserve(length); + for (size_t i = 0; i < length; ++i) { + if UNLIKELY (containAtIndex_.has_value() && *containAtIndex_ == i) { + elements.push_back(containGenerator_->generate()); + } else { + elements.push_back(elementGenerator_->generate()); + } + } + return variant::array(elements); + } + + private: + const size_t maxLength_; + + std::unique_ptr elementGenerator_; + + std::optional containAtIndex_; + + std::unique_ptr containGenerator_; +}; + +template +class RandomInputGenerator>> + : public AbstractInputGenerator { + public: + RandomInputGenerator>>( + size_t seed, + const TypePtr& type, + double nullRatio, + size_t maxLength = 10, + std::unique_ptr&& keyGenerator = nullptr, + std::unique_ptr&& valueGenerator = nullptr, + std::optional containAtIndex = std::nullopt, + std::unique_ptr&& containKeyGenerator = nullptr, + std::unique_ptr&& containValueGenerator = nullptr) + : AbstractInputGenerator(seed, type, nullptr, nullRatio), + maxLength_{maxLength}, + keyGenerator_{ + keyGenerator + ? std::move(keyGenerator) + : getRandomInputGenerator(seed, type->childAt(0), 0.0)}, + valueGenerator_{ + valueGenerator + ? std::move(valueGenerator) + : getRandomInputGenerator(seed, type->childAt(1), nullRatio)}, + containAtIndex_{containAtIndex}, + containKeyGenerator_{std::move(containKeyGenerator)}, + containValueGenerator_{std::move(containValueGenerator)} {} + + ~RandomInputGenerator>>() + override = default; + + variant generate() override { + if (coinToss(rng_, nullRatio_)) { + return variant::null(TypeKind::MAP); + } + + const auto length = rand(rng_, 0, maxLength_); + std::map map; + for (size_t i = 0; i < length; ++i) { + if UNLIKELY (containAtIndex_.has_value() && *containAtIndex_ == i) { + map.emplace( + containKeyGenerator_->generate(), + containValueGenerator_->generate()); + } else { + map.emplace(keyGenerator_->generate(), valueGenerator_->generate()); + } + } + return variant::map(map); + } + + private: + const size_t maxLength_; + + std::unique_ptr keyGenerator_; + + std::unique_ptr valueGenerator_; + + std::optional containAtIndex_; + + std::unique_ptr containKeyGenerator_; + + std::unique_ptr containValueGenerator_; +}; + +template +class RandomInputGenerator>> + : public AbstractInputGenerator { + public: + RandomInputGenerator>>( + size_t seed, + const TypePtr& type, + std::vector> fieldGenerators, + double nullRatio) + : AbstractInputGenerator(seed, type, nullptr, nullRatio) { + const auto length = type->size(); + fieldGenerators_ = std::move(fieldGenerators); + for (size_t i = 0; i < length; ++i) { + if (fieldGenerators_.size() <= i) { + fieldGenerators_.push_back( + getRandomInputGenerator(seed, type->childAt(i), nullRatio)); + } else if (fieldGenerators_[i] == nullptr) { + fieldGenerators_[i] = + getRandomInputGenerator(seed, type->childAt(i), nullRatio); + } + } + } + + ~RandomInputGenerator>>() + override = default; + + variant generate() override { + if (coinToss(rng_, nullRatio_)) { + return variant::null(TypeKind::ROW); + } + + const auto length = type_->size(); + std::vector fields; + fields.reserve(length); + for (size_t i = 0; i < length; ++i) { + fields.push_back(fieldGenerators_[i]->generate()); + } + return variant::row(fields); + } + + private: + std::vector> fieldGenerators_; +}; + +template , int> = 0> +class RangeConstrainedGenerator : public AbstractInputGenerator { + public: + RangeConstrainedGenerator( + size_t seed, + const TypePtr& type, + double nullRatio, + T min, + T max) + : AbstractInputGenerator(seed, type, nullptr, nullRatio), + min_{min}, + max_{max} {} + + ~RangeConstrainedGenerator() override = default; + + variant generate() override { + if (coinToss(rng_, nullRatio_)) { + return variant::null(type_->kind()); + } + return variant(rand(rng_, min_, max_)); + } + + private: + T min_; + T max_; +}; + +class NotEqualConstrainedGenerator : public AbstractInputGenerator { + public: + // nullRatio doesn't affect the data generation because it is 'next' that + // generates data. + NotEqualConstrainedGenerator( + size_t seed, + const TypePtr& type, + const variant& excludedValue, + std::unique_ptr&& next) + : AbstractInputGenerator(seed, type, std::move(next), 0.0), + excludedValue_{excludedValue} {} + + ~NotEqualConstrainedGenerator() override = default; + + variant generate() override; + + private: + variant excludedValue_; +}; + +class SetConstrainedGenerator : public AbstractInputGenerator { + public: + // nullRatio doesn't affect the data generation because only variants in 'set' + // can be generated. + SetConstrainedGenerator( + size_t seed, + const TypePtr& type, + const std::vector& set) + : AbstractInputGenerator(seed, type, nullptr, 0.0), set_{set} {} + + ~SetConstrainedGenerator() override = default; + + variant generate() override; + + private: + std::vector set_; +}; + +class JsonInputGenerator : public AbstractInputGenerator { + public: + JsonInputGenerator( + size_t seed, + const TypePtr& type, + double nullRatio, + std::unique_ptr&& objectGenerator, + bool makeRandomVariation = false) + : AbstractInputGenerator(seed, type, nullptr, nullRatio), + objectGenerator_{std::move(objectGenerator)}, + makeRandomVariation_{makeRandomVariation}, + opts_{getSerializationOptions()} {} + + ~JsonInputGenerator() override = default; + + variant generate() override; + + const folly::json::serialization_opts& serializationOptions() const { + return opts_; + } + + private: + template + folly::dynamic convertVariantToDynamicPrimitive(const variant& v) { + using T = typename TypeTraits::DeepCopiedType; + VELOX_CHECK(v.isSet()); + const T value = v.value(); + return folly::dynamic(value); + } + + folly::dynamic convertVariantToDynamic(const variant& object); + + void makeRandomVariation(std::string json); + + folly::json::serialization_opts getSerializationOptions(); + + std::unique_ptr objectGenerator_; + + bool makeRandomVariation_; + + folly::json::serialization_opts opts_; +}; + +} // namespace facebook::velox::fuzzer diff --git a/velox/vector/fuzzer/ConstrainedVectorGenerator.cpp b/velox/vector/fuzzer/ConstrainedVectorGenerator.cpp new file mode 100644 index 000000000000..1dc382bd05b0 --- /dev/null +++ b/velox/vector/fuzzer/ConstrainedVectorGenerator.cpp @@ -0,0 +1,149 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/vector/fuzzer/ConstrainedVectorGenerator.h" + +#include "velox/expression/VectorWriters.h" + +namespace facebook::velox::fuzzer { + +using exec::GenericWriter; +using exec::VectorWriter; + +// static +VectorPtr ConstrainedVectorGenerator::generateConstant( + const std::shared_ptr& customGenerator, + vector_size_t size, + memory::MemoryPool* pool) { + VELOX_CHECK_NOT_NULL(customGenerator); + VELOX_CHECK(customGenerator->type()->isPrimitiveType()); + + const auto& type = customGenerator->type(); + const auto variant = customGenerator->generate(); + + return BaseVector::createConstant(type, variant, size, pool); +} + +template +void writeOne(const variant& value, GenericWriter& writer); +template <> +void writeOne(const variant& value, GenericWriter& writer); +template <> +void writeOne(const variant& value, GenericWriter& writer); +template <> +void writeOne(const variant& value, GenericWriter& writer); +template <> +void writeOne(const variant& value, GenericWriter& writer); +template <> +void writeOne(const variant& value, GenericWriter& writer); + +template +void writeOne(const variant& value, GenericWriter& writer) { + using T = typename TypeTraits::NativeType; + writer.template castTo() = value.value(); +} + +template <> +void writeOne(const variant& value, GenericWriter& writer) { + writer.template castTo() = value.value(); +} + +template <> +void writeOne( + const variant& value, + GenericWriter& writer) { + writer.template castTo() = value.value(); +} + +template <> +void writeOne(const variant& value, GenericWriter& writer) { + auto& writerTyped = writer.template castTo>(); + const auto& elements = value.array(); + for (const auto& element : elements) { + if (element.isNull()) { + writerTyped.add_null(); + } else { + VELOX_DYNAMIC_TYPE_DISPATCH( + writeOne, element.kind(), element, writerTyped.add_item()); + } + } +} + +template <> +void writeOne(const variant& value, GenericWriter& writer) { + auto& writerTyped = writer.template castTo>(); + const auto& map = value.map(); + for (const auto& pair : map) { + const auto& key = pair.first; + const auto& value = pair.second; + VELOX_CHECK(!key.isNull()); + if (value.isNull()) { + VELOX_DYNAMIC_TYPE_DISPATCH( + writeOne, key.kind(), key, writerTyped.add_null()); + } else { + auto writers = writerTyped.add_item(); + VELOX_DYNAMIC_TYPE_DISPATCH( + writeOne, key.kind(), key, std::get<0>(writers)); + VELOX_DYNAMIC_TYPE_DISPATCH( + writeOne, value.kind(), value, std::get<1>(writers)); + } + } +} + +template <> +void writeOne(const variant& value, GenericWriter& writer) { + auto& writerTyped = writer.template castTo(); + const auto& elements = value.row(); + column_index_t i = 0; + for (const auto& element : elements) { + if (element.isNull()) { + writerTyped.set_null_at(i); + } else { + VELOX_DYNAMIC_TYPE_DISPATCH( + writeOne, element.kind(), element, writerTyped.get_writer_at(i)); + } + i++; + } +} + +// static +VectorPtr ConstrainedVectorGenerator::generateFlat( + const std::shared_ptr& customGenerator, + vector_size_t size, + memory::MemoryPool* pool) { + VELOX_CHECK_NOT_NULL(customGenerator); + + VectorPtr result; + const auto& type = customGenerator->type(); + BaseVector::ensureWritable(SelectivityVector(size), type, pool, result); + VectorWriter writer; + writer.init(*result); + + for (auto i = 0; i < size; ++i) { + writer.setOffset(i); + const auto variant = customGenerator->generate(); + if (variant.isNull()) { + writer.commitNull(); + } else { + VELOX_DYNAMIC_TYPE_DISPATCH( + writeOne, type->kind(), variant, writer.current()); + writer.commit(true); + } + } + return result; +} + +} // namespace facebook::velox::fuzzer diff --git a/velox/vector/fuzzer/ConstrainedVectorGenerator.h b/velox/vector/fuzzer/ConstrainedVectorGenerator.h new file mode 100644 index 000000000000..5a887f26b453 --- /dev/null +++ b/velox/vector/fuzzer/ConstrainedVectorGenerator.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "velox/vector/fuzzer/ConstrainedGenerators.h" + +namespace facebook::velox::fuzzer { + +class ConstrainedVectorGenerator { + public: + ConstrainedVectorGenerator() = delete; + + static VectorPtr generateConstant( + const std::shared_ptr& customGenerator, + vector_size_t size, + memory::MemoryPool* pool); + + static VectorPtr generateFlat( + const std::shared_ptr& customGenerator, + vector_size_t size, + memory::MemoryPool* pool); +}; + +} // namespace facebook::velox::fuzzer diff --git a/velox/vector/fuzzer/Utils.cpp b/velox/vector/fuzzer/Utils.cpp index 805fcf1063d7..d63695c323a3 100644 --- a/velox/vector/fuzzer/Utils.cpp +++ b/velox/vector/fuzzer/Utils.cpp @@ -16,13 +16,92 @@ #include "velox/vector/fuzzer/Utils.h" -namespace facebook::velox::generator_spec_utils { +namespace facebook::velox { bool coinToss(FuzzerGenerator& rng, double threshold) { static std::uniform_real_distribution<> dist(0.0, 1.0); return dist(rng) < threshold; } +Timestamp randTimestamp( + FuzzerGenerator& rng, + FuzzerTimestampPrecision timestampPrecision) { + // Generate timestamps only in the valid range to avoid datetime functions, + // such as try_cast(varchar as timestamp), throwing VeloxRuntimeError in + // fuzzers. + constexpr int64_t min = -2'140'671'600; + constexpr int64_t max = 2'140'671'600; + constexpr int64_t microInSecond = 1'000'000; + constexpr int64_t millisInSecond = 1'000; + // DWRF requires nano to be in a certain range. Hardcode the value here to + // avoid the dependency on DWRF. + constexpr int64_t MAX_NANOS = 1'000'000'000; + + switch (timestampPrecision) { + case FuzzerTimestampPrecision::kNanoSeconds: + return Timestamp( + rand(rng, min, max), (rand(rng) % MAX_NANOS)); + case FuzzerTimestampPrecision::kMicroSeconds: + return Timestamp::fromMicros( + rand(rng, min, max) * microInSecond + + rand(rng, -microInSecond, microInSecond)); + case FuzzerTimestampPrecision::kMilliSeconds: + return Timestamp::fromMillis( + rand(rng, min, max) * millisInSecond + + rand(rng, -millisInSecond, millisInSecond)); + case FuzzerTimestampPrecision::kSeconds: + return Timestamp(rand(rng, min, max), 0); + } + return {}; // no-op. +} + +int32_t randDate(FuzzerGenerator& rng) { + constexpr int64_t min = -24'450; + constexpr int64_t max = 24'450; + return rand(rng, min, max); +} + +FOLLY_ALWAYS_INLINE char16_t getRandomChar( + FuzzerGenerator& rng, + const std::vector>& charSet) { + const auto& chars = charSet.size() == 1 + ? charSet.front() + : charSet[rand(rng) % charSet.size()]; + auto size = chars.second - chars.first; + auto inc = (rand(rng) % size); + char16_t res = chars.first + inc; + return res; +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +std::string randString( + FuzzerGenerator& rng, + size_t length, + const std::vector& encodings, + std::string& buf, + std::wstring_convert, char16_t>& converter) { + buf.clear(); + std::u16string wbuf; + wbuf.resize(length); + + for (size_t i = 0; i < length; ++i) { + // First choose a random encoding from the list of input acceptable + // encodings. + VELOX_CHECK_GE(encodings.size(), 1); + const auto& encoding = (encodings.size() == 1) + ? encodings.front() + : encodings[rand(rng) % encodings.size()]; + + wbuf[i] = getRandomChar(rng, kUTFChatSets[encoding]); + } + buf.append(converter.to_bytes(wbuf)); + return buf; +} +#pragma GCC diagnostic pop + +namespace generator_spec_utils { + vector_size_t getRandomIndex(FuzzerGenerator& rng, vector_size_t maxIndex) { std::uniform_int_distribution indexGenerator( 0, maxIndex); // generates index in [0, maxIndex] @@ -59,4 +138,6 @@ BufferPtr generateIndicesBuffer( return indices; } -} // namespace facebook::velox::generator_spec_utils +} // namespace generator_spec_utils + +} // namespace facebook::velox diff --git a/velox/vector/fuzzer/Utils.h b/velox/vector/fuzzer/Utils.h index 0248b08f942f..8fcbcc330a0b 100644 --- a/velox/vector/fuzzer/Utils.h +++ b/velox/vector/fuzzer/Utils.h @@ -16,6 +16,13 @@ #pragma once +#include +#include + +#include +#include +#include + #include "velox/vector/BaseVector.h" #include "velox/vector/NullsBuilder.h" @@ -23,6 +30,57 @@ namespace facebook::velox { using FuzzerGenerator = std::mt19937; +enum UTF8CharList { + ASCII = 0, // Ascii character set. + UNICODE_CASE_SENSITIVE = 1, // Unicode scripts that support case. + EXTENDED_UNICODE = 2, // Extended Unicode: Arabic, Devanagiri etc + MATHEMATICAL_SYMBOLS = 3 // Mathematical Symbols. +}; + +/// Unicode character ranges. Ensure the vector indexes match the UTF8CharList +/// enum values. +/// +/// Source: https://jrgraphix.net/research/unicode_blocks.php +static const std::vector>> + kUTFChatSets{ + // UTF8CharList::ASCII + { + {33, 127}, // All ASCII printable chars. + }, + // UTF8CharList::UNICODE_CASE_SENSITIVE + { + {u'\u0020', u'\u007F'}, // Basic Latin. + {u'\u0400', u'\u04FF'}, // Cyrillic. + }, + // UTF8CharList::EXTENDED_UNICODE + { + {u'\u03F0', u'\u03FF'}, // Greek. + {u'\u0100', u'\u017F'}, // Latin Extended A. + {u'\u0600', u'\u06FF'}, // Arabic. + {u'\u0900', u'\u097F'}, // Devanagari. + {u'\u0600', u'\u06FF'}, // Hebrew. + {u'\u3040', u'\u309F'}, // Hiragana. + {u'\u2000', u'\u206F'}, // Punctuation. + {u'\u2070', u'\u209F'}, // Sub/Super Script. + {u'\u20A0', u'\u20CF'}, // Currency. + }, + // UTF8CharList::MATHEMATICAL_SYMBOLS + { + {u'\u2200', u'\u22FF'}, // Math Operators. + {u'\u2150', u'\u218F'}, // Number Forms. + {u'\u25A0', u'\u25FF'}, // Geometric Shapes. + {u'\u27C0', u'\u27EF'}, // Math Symbols. + {u'\u2A00', u'\u2AFF'}, // Supplemental. + }, + }; + +bool coinToss(FuzzerGenerator& rng, double threshold); + +struct DataSpec { + bool includeNaN; + bool includeInfinity; +}; + enum class FuzzerTimestampPrecision : int8_t { kNanoSeconds = 0, kMicroSeconds = 1, @@ -30,9 +88,114 @@ enum class FuzzerTimestampPrecision : int8_t { kSeconds = 3, }; -namespace generator_spec_utils { +// Generate random values for the different supported types. +template +inline T rand(FuzzerGenerator& rng, DataSpec dataSpec = {false, false}) { + VELOX_NYI(); +} -bool coinToss(FuzzerGenerator& rng, double threshold); +template <> +inline int8_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return boost::random::uniform_int_distribution()(rng); +} + +template <> +inline int16_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return boost::random::uniform_int_distribution()(rng); +} + +template <> +inline int32_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return boost::random::uniform_int_distribution()(rng); +} + +template <> +inline int64_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return boost::random::uniform_int_distribution()(rng); +} + +template <> +inline double rand(FuzzerGenerator& rng, DataSpec dataSpec) { + if (dataSpec.includeNaN && coinToss(rng, 0.05)) { + return std::nan(""); + } + + if (dataSpec.includeInfinity && coinToss(rng, 0.05)) { + return std::numeric_limits::infinity(); + } + + return boost::random::uniform_01()(rng); +} + +template <> +inline float rand(FuzzerGenerator& rng, DataSpec dataSpec) { + if (dataSpec.includeNaN && coinToss(rng, 0.05)) { + return std::nanf(""); + } + + if (dataSpec.includeInfinity && coinToss(rng, 0.05)) { + return std::numeric_limits::infinity(); + } + + return boost::random::uniform_01()(rng); +} + +template <> +inline bool rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return boost::random::uniform_int_distribution(0, 1)(rng); +} + +template <> +inline uint32_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return boost::random::uniform_int_distribution()(rng); +} + +template <> +inline uint64_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return boost::random::uniform_int_distribution()(rng); +} + +template <> +inline int128_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return HugeInt::build(rand(rng), rand(rng)); +} + +Timestamp randTimestamp( + FuzzerGenerator& rng, + FuzzerTimestampPrecision timestampPrecision); + +template <> +inline Timestamp rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + // TODO: support other timestamp precisions. + return randTimestamp(rng, FuzzerTimestampPrecision::kMicroSeconds); +} + +int32_t randDate(FuzzerGenerator& rng); + +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T rand(FuzzerGenerator& rng, T min, T max) { + if constexpr (std::is_integral_v) { + return boost::random::uniform_int_distribution(min, max)(rng); + } else { + return boost::random::uniform_real_distribution(min, max)(rng); + } +} + +/// Generates a random string in buf with characters of encodings. Return buf at +/// the end. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +std::string randString( + FuzzerGenerator& rng, + size_t length, + const std::vector& encodings, + std::string& buf, + std::wstring_convert, char16_t>& converter); +#pragma GCC diagnostic pop + +namespace generator_spec_utils { vector_size_t getRandomIndex(FuzzerGenerator& rng, vector_size_t maxIndex); diff --git a/velox/vector/fuzzer/VectorFuzzer.cpp b/velox/vector/fuzzer/VectorFuzzer.cpp index 56cd03a27c89..8f89eb2a09a7 100644 --- a/velox/vector/fuzzer/VectorFuzzer.cpp +++ b/velox/vector/fuzzer/VectorFuzzer.cpp @@ -27,15 +27,12 @@ #include "velox/vector/FlatVector.h" #include "velox/vector/NullsBuilder.h" #include "velox/vector/VectorTypeUtils.h" +#include "velox/vector/fuzzer/Utils.h" namespace facebook::velox { namespace { -// DWRF requires nano to be in a certain range. Hardcode the value here to avoid -// the dependency on DWRF. -constexpr int64_t MAX_NANOS = 1'000'000'000; - // Structure to help temporary changes to Options. This objects saves the // current state of the Options object, and restores it when it's destructed. // For instance, if you would like to temporarily disable nulls for a particular @@ -62,116 +59,6 @@ struct ScopedOptions { VectorFuzzer::Options savedOpts; }; -// Generate random values for the different supported types. -template -T rand(FuzzerGenerator& rng, DataSpec dataSpec = {false, false}) { - VELOX_NYI(); -} - -template <> -int8_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return boost::random::uniform_int_distribution()(rng); -} - -template <> -int16_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return boost::random::uniform_int_distribution()(rng); -} - -template <> -int32_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return boost::random::uniform_int_distribution()(rng); -} - -template <> -int64_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return boost::random::uniform_int_distribution()(rng); -} - -template <> -double rand(FuzzerGenerator& rng, DataSpec dataSpec) { - if (dataSpec.includeNaN && coinToss(rng, 0.05)) { - return std::nan(""); - } - - if (dataSpec.includeInfinity && coinToss(rng, 0.05)) { - return std::numeric_limits::infinity(); - } - - return boost::random::uniform_01()(rng); -} - -template <> -float rand(FuzzerGenerator& rng, DataSpec dataSpec) { - if (dataSpec.includeNaN && coinToss(rng, 0.05)) { - return std::nanf(""); - } - - if (dataSpec.includeInfinity && coinToss(rng, 0.05)) { - return std::numeric_limits::infinity(); - } - - return boost::random::uniform_01()(rng); -} - -template <> -bool rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return boost::random::uniform_int_distribution(0, 1)(rng); -} - -template <> -uint32_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return boost::random::uniform_int_distribution()(rng); -} - -template <> -uint64_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return boost::random::uniform_int_distribution()(rng); -} - -template <> -int128_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return HugeInt::build(rand(rng), rand(rng)); -} - -template , int> = 0> -T rand(FuzzerGenerator& rng, T min, T max) { - return boost::random::uniform_int_distribution(min, max)(rng); -} - -Timestamp randTimestamp(FuzzerGenerator& rng, VectorFuzzer::Options opts) { - // Generate timestamps only in the valid range to avoid datetime functions, - // such as try_cast(varchar as timestamp), throwing VeloxRuntimeError in - // fuzzers. - constexpr int64_t min = -2'140'671'600; - constexpr int64_t max = 2'140'671'600; - constexpr int64_t microInSecond = 1'000'000; - constexpr int64_t millisInSecond = 1'000; - - switch (opts.timestampPrecision) { - case VectorFuzzer::Options::TimestampPrecision::kNanoSeconds: - return Timestamp( - rand(rng, min, max), (rand(rng) % MAX_NANOS)); - case VectorFuzzer::Options::TimestampPrecision::kMicroSeconds: - return Timestamp::fromMicros( - rand(rng, min, max) * microInSecond + - rand(rng, -microInSecond, microInSecond)); - case VectorFuzzer::Options::TimestampPrecision::kMilliSeconds: - return Timestamp::fromMillis( - rand(rng, min, max) * millisInSecond + - rand(rng, -millisInSecond, millisInSecond)); - case VectorFuzzer::Options::TimestampPrecision::kSeconds: - return Timestamp(rand(rng, min, max), 0); - } - return {}; // no-op. -} - -int32_t randDate(FuzzerGenerator& rng) { - constexpr int64_t min = -24'450; - constexpr int64_t max = 24'450; - return rand(rng, min, max); -} - size_t getElementsVectorLength( const VectorFuzzer::Options& opts, vector_size_t size) { @@ -196,43 +83,7 @@ int128_t randLongDecimal(const TypePtr& type, FuzzerGenerator& rng) { return rand(rng) % DecimalUtil::kPowersOfTen[precision]; } -/// Unicode character ranges. Ensure the vector indexes match the UTF8CharList -/// enum values. -/// -/// Source: https://jrgraphix.net/research/unicode_blocks.php -const std::vector>> kUTFChatSets{ - // UTF8CharList::ASCII - { - {33, 127}, // All ASCII printable chars. - }, - // UTF8CharList::UNICODE_CASE_SENSITIVE - { - {u'\u0020', u'\u007F'}, // Basic Latin. - {u'\u0400', u'\u04FF'}, // Cyrillic. - }, - // UTF8CharList::EXTENDED_UNICODE - { - {u'\u03F0', u'\u03FF'}, // Greek. - {u'\u0100', u'\u017F'}, // Latin Extended A. - {u'\u0600', u'\u06FF'}, // Arabic. - {u'\u0900', u'\u097F'}, // Devanagari. - {u'\u0600', u'\u06FF'}, // Hebrew. - {u'\u3040', u'\u309F'}, // Hiragana. - {u'\u2000', u'\u206F'}, // Punctuation. - {u'\u2070', u'\u209F'}, // Sub/Super Script. - {u'\u20A0', u'\u20CF'}, // Currency. - }, - // UTF8CharList::MATHEMATICAL_SYMBOLS - { - {u'\u2200', u'\u22FF'}, // Math Operators. - {u'\u2150', u'\u218F'}, // Number Forms. - {u'\u25A0', u'\u25FF'}, // Geometric Shapes. - {u'\u27C0', u'\u27EF'}, // Math Symbols. - {u'\u2A00', u'\u2AFF'}, // Supplemental. - }, -}; - -FOLLY_ALWAYS_INLINE char16_t getRandomChar( +/*FOLLY_ALWAYS_INLINE char16_t getRandomChar( FuzzerGenerator& rng, const std::vector>& charSet) { const auto& chars = charSet.size() == 1 @@ -242,7 +93,7 @@ FOLLY_ALWAYS_INLINE char16_t getRandomChar( auto inc = (rand(rng) % size); char16_t res = chars.first + inc; return res; -} +}*/ /// Generates a random string (string size and encoding are passed through /// Options). Returns a StringView which uses `buf` as the underlying buffer. @@ -251,24 +102,11 @@ StringView randString( const VectorFuzzer::Options& opts, std::string& buf, std::wstring_convert, char16_t>& converter) { - buf.clear(); - std::u16string wbuf; const size_t stringLength = opts.stringVariableLength ? rand(rng) % opts.stringLength : opts.stringLength; - wbuf.resize(stringLength); - - for (size_t i = 0; i < stringLength; ++i) { - // First choose a random encoding from the list of input acceptable - // encodings. - const auto& encoding = (opts.charEncodings.size() == 1) - ? opts.charEncodings.front() - : opts.charEncodings[rand(rng) % opts.charEncodings.size()]; - - wbuf[i] = getRandomChar(rng, kUTFChatSets[encoding]); - } - buf.append(converter.to_bytes(wbuf)); + randString(rng, stringLength, opts.charEncodings, buf, converter); return StringView(buf); } @@ -290,7 +128,7 @@ VectorPtr fuzzConstantPrimitiveImpl( } if constexpr (std::is_same_v) { return std::make_shared>( - pool, size, false, type, randTimestamp(rng, opts)); + pool, size, false, type, randTimestamp(rng, opts.timestampPrecision)); } else if (type->isDate()) { return std::make_shared>( pool, size, false, type, randDate(rng)); @@ -322,7 +160,7 @@ void fuzzFlatPrimitiveImpl( if constexpr (std::is_same_v) { flatVector->set(i, randString(rng, opts, strBuf, converter)); } else if constexpr (std::is_same_v) { - flatVector->set(i, randTimestamp(rng, opts)); + flatVector->set(i, randTimestamp(rng, opts.timestampPrecision)); } else if constexpr (std::is_same_v) { if (vector->type()->isShortDecimal()) { flatVector->set(i, randShortDecimal(vector->type(), rng)); diff --git a/velox/vector/fuzzer/VectorFuzzer.h b/velox/vector/fuzzer/VectorFuzzer.h index 0cd9a510305c..665039f34fee 100644 --- a/velox/vector/fuzzer/VectorFuzzer.h +++ b/velox/vector/fuzzer/VectorFuzzer.h @@ -26,18 +26,6 @@ namespace facebook::velox { -enum UTF8CharList { - ASCII = 0, // Ascii character set. - UNICODE_CASE_SENSITIVE = 1, // Unicode scripts that support case. - EXTENDED_UNICODE = 2, // Extended Unicode: Arabic, Devanagiri etc - MATHEMATICAL_SYMBOLS = 3 // Mathematical Symbols. -}; - -struct DataSpec { - bool includeNaN; - bool includeInfinity; -}; - const std::vector& defaultScalarTypes(); /// VectorFuzzer is a helper class that generates randomized vectors and their diff --git a/velox/vector/fuzzer/tests/CMakeLists.txt b/velox/vector/fuzzer/tests/CMakeLists.txt index 4b8fdb65f5d0..d984e2118801 100644 --- a/velox/vector/fuzzer/tests/CMakeLists.txt +++ b/velox/vector/fuzzer/tests/CMakeLists.txt @@ -22,3 +22,18 @@ target_link_libraries( GTest::gtest GTest::gtest_main GTest::gmock) + +add_executable(velox_fuzzer_constrained_input_generators_test + ConstrainedGeneratorsTest.cpp) + +add_test( + NAME velox_fuzzer_constrained_input_generators_test + COMMAND velox_fuzzer_constrained_input_generators_test + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + +target_link_libraries( + velox_fuzzer_constrained_input_generators_test + velox_fuzzer_constrained_input_generators + velox_presto_types + velox_type + velox_vector_test_lib) diff --git a/velox/vector/fuzzer/tests/ConstrainedGeneratorsTest.cpp b/velox/vector/fuzzer/tests/ConstrainedGeneratorsTest.cpp new file mode 100644 index 000000000000..8d1dfc17a652 --- /dev/null +++ b/velox/vector/fuzzer/tests/ConstrainedGeneratorsTest.cpp @@ -0,0 +1,375 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/vector/fuzzer/ConstrainedGenerators.h" + +#include + +#include "velox/functions/prestosql/types/JsonType.h" +#include "velox/type/Variant.h" +#include "velox/vector/fuzzer/ConstrainedVectorGenerator.h" +#include "velox/vector/tests/utils/VectorTestBase.h" + +namespace facebook::velox::fuzzer::test { + +class ConstrainedGeneratorsTest : public testing::Test, + public velox::test::VectorTestBase { + protected: + static void SetUpTestCase() { + memory::MemoryManager::testingSetInstance({}); + } + + template + void testRandomPrimitive(const TypePtr& type, bool testNull) { + VELOX_CHECK_EQ(type->kind(), KIND); + using T = typename TypeTraits::NativeType; + + std::unique_ptr generator = + std::make_unique>( + 0, type, testNull ? 1.0 : 0.0); + auto value = generator->generate(); + if (testNull) { + EXPECT_FALSE(value.hasValue()); + } else { + EXPECT_TRUE(value.hasValue()); + } + EXPECT_EQ(value.kind(), KIND); + } + + template + void testRandomComplex(const TypePtr& type, bool testNull) { + VELOX_CHECK_EQ(type->kind(), KIND); + using T = typename TypeTraits::ImplType; + + std::unique_ptr generator = + std::make_unique>( + 0, type, testNull ? 1.0 : 0.0); + auto value = generator->generate(); + EXPECT_EQ(value.kind(), KIND); + if (testNull) { + EXPECT_FALSE(value.hasValue()); + } else { + EXPECT_TRUE(value.hasValue()); + } + } + + template + void testRangePrimitive(const TypePtr& type, T min, T max, bool testNull) { + std::unique_ptr generator = + std::make_unique>( + 0, type, testNull ? 1.0 : 0.0, min, max); + + const uint32_t kIterations = 1000; + const variant lower = variant(min); + const variant upper = variant(max); + for (uint32_t i = 0; i < kIterations; ++i) { + auto value = generator->generate(); + EXPECT_EQ(value.kind(), type->kind()); + if (testNull) { + EXPECT_FALSE(value.hasValue()); + } else { + EXPECT_TRUE(value.hasValue()); + EXPECT_TRUE(lower < value || lower == value); + EXPECT_TRUE(value < upper || value == upper); + } + } + } + + template + void testRangeInComplex(const TypePtr& type, T min, T max, bool testNull) { + EXPECT_TRUE(type->isArray() && type->childAt(0)->isPrimitiveType()); + std::unique_ptr generator = + std::make_unique>( + 0, + type, + 0.0, + 1000, + std::make_unique>( + 0, type, testNull ? 1.0 : 0.0, min, max)); + + const auto value = generator->generate(); + EXPECT_EQ(value.kind(), TypeKind::ARRAY); + EXPECT_TRUE(value.hasValue()); + + const auto elements = value.array(); + const variant lower = variant(min); + const variant upper = variant(max); + for (const auto& element : elements) { + if (testNull) { + EXPECT_FALSE(element.hasValue()); + } else { + EXPECT_TRUE(element.hasValue()); + EXPECT_TRUE(lower < element || lower == element); + EXPECT_TRUE(element < upper || element == upper); + } + } + } + + template + void testNotEqualPrimitive( + const TypePtr& type, + const TValue& excludedValue, + bool testNull) { + VELOX_CHECK_EQ(type->kind(), KIND); + using T = typename TypeTraits::NativeType; + + variant excludedVariant{excludedValue}; + std::unique_ptr generator = + std::make_unique( + 0, + type, + excludedVariant, + std::make_unique>( + 0, type, testNull ? 1.0 : 0.0)); + + const uint32_t kIterations = 1000; + for (uint32_t i = 0; i < kIterations; ++i) { + auto value = generator->generate(); + EXPECT_EQ(value.kind(), KIND); + if (testNull) { + EXPECT_FALSE(value.hasValue()); + } else { + EXPECT_TRUE(value.hasValue()); + EXPECT_NE(value, excludedVariant); + } + } + } + + template + void testNotEqualComplex( + const TypePtr& type, + const variant& excludedVariant, + bool testNull) { + VELOX_CHECK_EQ(type->kind(), KIND); + using T = typename TypeTraits::ImplType; + + std::unique_ptr generator = + std::make_unique( + 0, + type, + excludedVariant, + std::make_unique>( + 0, type, testNull ? 1.0 : 0.0)); + + const uint32_t kIterations = 1000; + for (uint32_t i = 0; i < kIterations; ++i) { + auto value = generator->generate(); + EXPECT_EQ(value.kind(), KIND); + if (testNull) { + EXPECT_FALSE(value.hasValue()); + } else { + EXPECT_TRUE(value.hasValue()); + EXPECT_NE(value, excludedVariant); + } + } + } + + template + void testSetPrimitive(const TypePtr& type, const TSet& setOfRawValues) { + VELOX_CHECK_EQ(type->kind(), KIND); + using T = typename TypeTraits::NativeType; + + const uint32_t kIterations = 1000; + std::vector variants; + for (const auto& value : setOfRawValues) { + variants.push_back(variant{value}); + } + std::unique_ptr generator = + std::make_unique(0, type, variants); + + for (uint32_t i = 0; i < kIterations; ++i) { + auto value = generator->generate(); + EXPECT_TRUE(value.hasValue()); + EXPECT_EQ(value.kind(), KIND); + EXPECT_NE(setOfRawValues.count(value.value()), 0); + } + } + + template + void testSetComplex( + const TypePtr& type, + const std::vector& variants) { + VELOX_CHECK_EQ(type->kind(), KIND); + using T = typename TypeTraits::ImplType; + + std::set setOfVariants{variants.begin(), variants.end()}; + + std::unique_ptr generator = + std::make_unique(0, type, variants); + + const uint32_t kIterations = 1000; + for (uint32_t i = 0; i < kIterations; ++i) { + auto value = generator->generate(); + EXPECT_TRUE(value.hasValue()); + EXPECT_EQ(value.kind(), KIND); + EXPECT_NE(setOfVariants.count(value), 0); + } + } + + template + void testGenerateVectorsPrimitive( + const TypePtr& type, + const variant& excludedValue) { + using T = typename TypeTraits::NativeType; + const uint32_t kSize = 1000; + std::shared_ptr generator = + std::make_shared( + 0, + type, + excludedValue, + std::make_unique>(0, type, 0.5)); + auto vector = + ConstrainedVectorGenerator::generateConstant(generator, kSize, pool()); + EXPECT_EQ(vector->size(), kSize); + EXPECT_EQ(vector->typeKind(), KIND); + EXPECT_TRUE(vector->isConstantEncoding()); + EXPECT_TRUE( + vector->isNullAt(0) || + vector->as>()->valueAt(0) != excludedValue); + + vector = ConstrainedVectorGenerator::generateFlat(generator, kSize, pool()); + EXPECT_EQ(vector->size(), kSize); + EXPECT_EQ(vector->typeKind(), KIND); + EXPECT_TRUE(vector->isFlatEncoding()); + bool hasNull = false; + for (auto i = 0; i < kSize; ++i) { + if (vector->isNullAt(i)) { + hasNull = true; + } else { + EXPECT_NE(vector->as>()->valueAt(i), excludedValue); + } + } + EXPECT_TRUE(hasNull); + } + + template + void testGenerateVectorsComplex(const TypePtr& type) { + using T = typename TypeTraits::ImplType; + const uint32_t kSize = 1000; + std::shared_ptr generator = + std::make_shared>(0, type, 0.5); + auto vector = + ConstrainedVectorGenerator::generateFlat(generator, kSize, pool()); + EXPECT_EQ(vector->size(), kSize); + EXPECT_EQ(vector->type(), type); + } +}; + +TEST_F(ConstrainedGeneratorsTest, randomPrimitive) { + testRandomPrimitive(INTEGER(), false); + testRandomPrimitive(VARCHAR(), false); + + testRandomPrimitive(INTEGER(), true); + testRandomPrimitive(VARCHAR(), true); +} + +TEST_F(ConstrainedGeneratorsTest, randomComplex) { + testRandomComplex( + ARRAY(MAP(VARCHAR(), ROW({BIGINT()}))), false); + testRandomComplex( + ARRAY(MAP(VARCHAR(), ROW({BIGINT()}))), true); +} + +TEST_F(ConstrainedGeneratorsTest, rangePrimitive) { + testRangePrimitive(BIGINT(), 1, 10, false); + testRangePrimitive(BIGINT(), 1, 10, true); + + testRangePrimitive(REAL(), 1.0, 10.0, false); + testRangePrimitive(REAL(), 1.0, 10.0, true); +} + +TEST_F(ConstrainedGeneratorsTest, rangeInComplex) { + testRangeInComplex(ARRAY(BIGINT()), 1, 10, false); + testRangeInComplex(ARRAY(BIGINT()), 1, 10, true); + + testRangeInComplex(ARRAY(REAL()), 1.0, 10.0, false); + testRangeInComplex(ARRAY(REAL()), 1.0, 10.0, true); +} + +TEST_F(ConstrainedGeneratorsTest, notEqPrimitive) { + testNotEqualPrimitive( + TINYINT(), static_cast(1), false); + testNotEqualPrimitive(VARCHAR(), ""_sv, false); + + testNotEqualPrimitive( + TINYINT(), static_cast(1), true); + testNotEqualPrimitive(VARCHAR(), ""_sv, true); +} + +TEST_F(ConstrainedGeneratorsTest, notEqComplex) { + auto excludedVariant = variant::array({variant::map( + {{variant{"1"}, variant::row({variant{static_cast(1)}})}})}); + testNotEqualComplex( + ARRAY(MAP(VARCHAR(), ROW({BIGINT()}))), excludedVariant, false); + testNotEqualComplex( + ARRAY(MAP(VARCHAR(), ROW({BIGINT()}))), excludedVariant, true); +} + +TEST_F(ConstrainedGeneratorsTest, setPrimitive) { + std::unordered_set integers{{1, 2, 3}}; + testSetPrimitive(INTEGER(), integers); + + std::unordered_set strings{{"1", "2", "3"}}; + testSetPrimitive(VARCHAR(), strings); +} + +TEST_F(ConstrainedGeneratorsTest, setComplex) { + std::vector variants{ + variant::array({variant::map( + {{variant{"1"}, variant::row({variant{static_cast(1)}})}})}), + variant::array({variant::map( + {{variant{"2"}, + variant::row({variant{static_cast(2)}})}})})}; + testSetComplex( + ARRAY(MAP(VARCHAR(), ROW({BIGINT()}))), variants); +} + +TEST_F(ConstrainedGeneratorsTest, json) { + const TypePtr type = ARRAY(MAP(DOUBLE(), ROW({BIGINT()}))); + std::unique_ptr generator = + std::make_unique( + 0, + JSON(), + 0.4, + std::make_unique>(0, type, 0.4)); + + const uint32_t kIterations = 1000; + const auto& opts = generator->serializationOptions(); + for (uint32_t i = 0; i < kIterations; ++i) { + auto value = generator->generate(); + EXPECT_EQ(value.kind(), TypeKind::VARCHAR); + if (value.hasValue()) { + EXPECT_TRUE(value.hasValue()); + folly::dynamic json; + auto jsonString = value.value(); + EXPECT_NO_THROW( + json = folly::parseJson(value.value(), opts)); + EXPECT_TRUE(json.isNull() || json.isArray()); + } + } +} + +TEST_F(ConstrainedGeneratorsTest, generateVectors) { + testGenerateVectorsPrimitive(BIGINT(), variant(0)); + testGenerateVectorsPrimitive(VARCHAR(), variant("")); + + testGenerateVectorsComplex( + ARRAY(ROW({MAP(VARCHAR(), BIGINT())}))); + testGenerateVectorsComplex( + MAP(ARRAY(BIGINT()), ROW({VARCHAR()}))); +} + +} // namespace facebook::velox::fuzzer::test