From 1b04b3e423fec43b75d01aa22a8b1743e986ab74 Mon Sep 17 00:00:00 2001 From: Wei He Date: Thu, 7 Nov 2024 12:18:47 -0800 Subject: [PATCH] Constrained input generators for fuzzers (#11368) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/11368 DO NOT REVIEW FOR NOW. This diff adds a prototype of the constrained input generator for fuzzers. These generators can be used to generate random data satisfying given constraints, needed for fuzzers for testing functions that have special requirement on input data, such as cdf functions, subscript functions, etc. Differential Revision: D65101030 --- velox/CMakeLists.txt | 1 + .../fuzzer_input_generator/CMakeLists.txt | 30 ++ .../ConstrainedGenerators.cpp | 221 ++++++++++++ .../ConstrainedGenerators.h | 330 ++++++++++++++++++ .../ConstrainedVectorGenerator.cpp | 149 ++++++++ .../ConstrainedVectorGenerator.h | 38 ++ .../tests/CMakeLists.txt | 28 ++ .../tests/ConstrainedGeneratorsTest.cpp | 260 ++++++++++++++ velox/vector/fuzzer/Utils.cpp | 46 ++- velox/vector/fuzzer/Utils.h | 147 +++++++- velox/vector/fuzzer/VectorFuzzer.cpp | 155 +------- velox/vector/fuzzer/VectorFuzzer.h | 12 - 12 files changed, 1249 insertions(+), 168 deletions(-) create mode 100644 velox/experimental/fuzzer_input_generator/CMakeLists.txt create mode 100644 velox/experimental/fuzzer_input_generator/ConstrainedGenerators.cpp create mode 100644 velox/experimental/fuzzer_input_generator/ConstrainedGenerators.h create mode 100644 velox/experimental/fuzzer_input_generator/ConstrainedVectorGenerator.cpp create mode 100644 velox/experimental/fuzzer_input_generator/ConstrainedVectorGenerator.h create mode 100644 velox/experimental/fuzzer_input_generator/tests/CMakeLists.txt create mode 100644 velox/experimental/fuzzer_input_generator/tests/ConstrainedGeneratorsTest.cpp diff --git a/velox/CMakeLists.txt b/velox/CMakeLists.txt index 06ae8bf1c0533..c0a6206313893 100644 --- a/velox/CMakeLists.txt +++ b/velox/CMakeLists.txt @@ -25,6 +25,7 @@ add_subdirectory(flag_definitions) add_subdirectory(external/date) add_subdirectory(external/md5) add_subdirectory(external/hdfs) +add_subdirectory(experimental/fuzzer_input_generator) # # examples depend on expression diff --git a/velox/experimental/fuzzer_input_generator/CMakeLists.txt b/velox/experimental/fuzzer_input_generator/CMakeLists.txt new file mode 100644 index 0000000000000..e1a493b85c467 --- /dev/null +++ b/velox/experimental/fuzzer_input_generator/CMakeLists.txt @@ -0,0 +1,30 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +velox_add_library(velox_fuzzer_constrained_input_generators + ConstrainedGenerators.cpp ConstrainedVectorGenerator.cpp) + +velox_link_libraries( + velox_fuzzer_constrained_input_generators + Folly::folly + velox_expression + velox_type + velox_vector_fuzzer_util) +if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + target_compile_options(velox_fuzzer_constrained_input_generators + PRIVATE -Wno-deprecated-declarations) +endif() + +if(${VELOX_BUILD_TESTING}) + add_subdirectory(tests) +endif() diff --git a/velox/experimental/fuzzer_input_generator/ConstrainedGenerators.cpp b/velox/experimental/fuzzer_input_generator/ConstrainedGenerators.cpp new file mode 100644 index 0000000000000..736003a3d2847 --- /dev/null +++ b/velox/experimental/fuzzer_input_generator/ConstrainedGenerators.cpp @@ -0,0 +1,221 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/experimental/fuzzer_input_generator/ConstrainedGenerators.h" + +#include + +#include "velox/vector/fuzzer/Utils.h" + +namespace facebook::velox::fuzzer { + +FOLLY_ALWAYS_INLINE char16_t getRandomChar( + FuzzerGenerator& rng, + const std::vector>& charSet) { + const auto& chars = charSet.size() == 1 + ? charSet.front() + : charSet[rand(rng) % charSet.size()]; + auto size = chars.second - chars.first; + auto inc = (rand(rng) % size); + char16_t res = chars.first + inc; + return res; +} + +/// Generates a random string (string size and encoding are passed through +/// Options). +std::string randString( + FuzzerGenerator& rng, + size_t length, + UTF8CharList encoding, + std::wstring_convert, char16_t>& converter) { + std::string buf; + std::u16string wbuf; + wbuf.resize(length); + + for (size_t i = 0; i < length; ++i) { + wbuf[i] = getRandomChar(rng, kUTFChatSets[encoding]); + } + buf.append(converter.to_bytes(wbuf)); + return buf; +} + +// AbstractInputGenerator +AbstractInputGenerator::AbstractInputGenerator( + size_t seed, + const TypePtr& type, + std::unique_ptr&& next) + : type_{type}, next_{std::move(next)} { + rng_.seed(seed); +} + +// NotEqualConstrainedGenerator +variant NotEqualConstrainedGenerator::generate() { + variant value; + do { + value = next_->generate(); + } while (value == excludedValue_); + return value; +} + +// SetConstrainedGenerator +variant SetConstrainedGenerator::generate() { + const auto index = + boost::random::uniform_int_distribution(0, set_.size() - 1)(rng_); + return set_[index]; +} + +// JsonInputGenerator +folly::json::serialization_opts JsonInputGenerator::getSerializationOptions() { + folly::json::serialization_opts opts; + opts.allow_non_string_keys = true; + opts.allow_nan_inf = true; + if (makeRandomVariation_) { + opts.convert_int_keys = rand(rng_); + opts.pretty_formatting = rand(rng_); + opts.pretty_formatting_indent_width = rand(rng_, 0, 4); + opts.encode_non_ascii = rand(rng_); + opts.allow_trailing_comma = rand(rng_); + opts.sort_keys = rand(rng_); + opts.skip_invalid_utf8 = rand(rng_); + opts.parse_numbers_as_strings = rand(rng_); + } + return opts; +} + +variant JsonInputGenerator::generate() { + const auto object = objectGenerator_->generate(); + const folly::dynamic jsonObject = convertVariantToDynamic(object); + const auto jsonString = folly::json::serialize(jsonObject, opts_); + if (makeRandomVariation_ && coinToss(rng_, 0.5)) { + makeRandomVariation(jsonString); + } + return variant(jsonString); +} + +folly::dynamic JsonInputGenerator::convertVariantToDynamic( + const variant& object) { + if (object.isNull()) { + return folly::dynamic(); + } + + switch (object.kind()) { + case TypeKind::BOOLEAN: + return convertVariantToDynamicPrimitive(object); + case TypeKind::TINYINT: + return convertVariantToDynamicPrimitive(object); + case TypeKind::SMALLINT: + return convertVariantToDynamicPrimitive(object); + case TypeKind::INTEGER: + return convertVariantToDynamicPrimitive(object); + case TypeKind::BIGINT: + return convertVariantToDynamicPrimitive(object); + case TypeKind::REAL: + return convertVariantToDynamicPrimitive(object); + case TypeKind::DOUBLE: + return convertVariantToDynamicPrimitive(object); + case TypeKind::VARCHAR: + return convertVariantToDynamicPrimitive(object); + case TypeKind::VARBINARY: + return convertVariantToDynamicPrimitive(object); + case TypeKind::TIMESTAMP: + return convertVariantToDynamicPrimitive(object); + case TypeKind::HUGEINT: + return convertVariantToDynamicPrimitive(object); + case TypeKind::ARRAY: { + folly::dynamic array = folly::dynamic::array; + for (const auto& element : object.value()) { + array.push_back(convertVariantToDynamic(element)); + } + return array; + } + case TypeKind::MAP: { + folly::dynamic map = folly::dynamic::object; + for (const auto& [key, value] : object.value()) { + map[convertVariantToDynamic(key)] = convertVariantToDynamic(value); + } + return map; + } + case TypeKind::ROW: { + folly::dynamic array = folly::dynamic::array; + for (const auto& element : object.value()) { + array.push_back(convertVariantToDynamic(element)); + } + return array; + } + default: + VELOX_UNREACHABLE("Unsupported type"); + } +} + +std::vector getControlCharacters() { + static std::vector controlCharacters = { + "\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06", + "\x07", "\x08", "\x09", "\x0A", "\x0B", "\x0C", "\x0D", + "\x0E", "\x0F", "\x10", "\x11", "\x12", "\x13", "\x14", + "\x15", "\x16", "\x17", "\x18", "\x19", "\x1A", "\x1B", + "\x1C", "\x1D", "\x1E", "\x1F", "\x20", "\x7F", "\u0080", + "\u0081", "\u0082", "\u0083", "\u0084", "\u0085", "\u0086", "\u0087", + "\u0088", "\u0089", "\u008A", "\u008B", "\u008C", "\u008D", "\u008E", + "\u008F", "\u0090", "\u0091", "\u0092", "\u0093", "\u0094", "\u0095", + "\u0096", "\u0097", "\u0098", "\u0099", "\u009A", "\u009B", "\u009C", + "\u009D", "\u009E", "\u009F"}; + return controlCharacters; +}; + +void JsonInputGenerator::makeRandomVariation(std::string json) { + if (coinToss(rng_, 0.1)) { + const auto controlCharacters = getControlCharacters(); + const auto index = rand(rng_, 0, controlCharacters.size() - 1); + const auto controlCharacter = controlCharacters[index]; + const auto indexToInsert = rand(rng_, 0, json.size()); + json.insert(indexToInsert, controlCharacter); + } else if (coinToss(rng_, 0.1)) { + const auto size = rand(rng_, 0, json.size()); + json.resize(size); + } +} + +// Utility functions +template +std::unique_ptr getRandomInputGeneratorPrimitive( + size_t seed, + const TypePtr& type) { + using T = typename TypeTraits::NativeType; + std::unique_ptr generator = + std::make_unique>(seed, type); + return generator; +} + +std::unique_ptr getRandomInputGenerator( + size_t seed, + const TypePtr& type) { + std::unique_ptr generator; + if (type->isPrimitiveType()) { + return VELOX_DYNAMIC_SCALAR_TEMPLATE_TYPE_DISPATCH( + getRandomInputGeneratorPrimitive, false, type->kind(), seed, type); + } else if (type->isArray()) { + generator = std::make_unique>(seed, type); + } else if (type->isMap()) { + generator = std::make_unique>(seed, type); + + } else if (type->isRow()) { + generator = std::make_unique>( + seed, type, std::vector>{}); + } + return generator; +} + +} // namespace facebook::velox::fuzzer diff --git a/velox/experimental/fuzzer_input_generator/ConstrainedGenerators.h b/velox/experimental/fuzzer_input_generator/ConstrainedGenerators.h new file mode 100644 index 0000000000000..d4a018e9c1fe5 --- /dev/null +++ b/velox/experimental/fuzzer_input_generator/ConstrainedGenerators.h @@ -0,0 +1,330 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include "folly/json.h" + +#include "velox/type/Type.h" +#include "velox/type/Variant.h" +#include "velox/vector/fuzzer/Utils.h" + +namespace facebook::velox::fuzzer { + +using facebook::velox::variant; + +class AbstractInputGenerator { + public: + AbstractInputGenerator( + size_t seed, + const TypePtr& type, + std::unique_ptr&& next); + + virtual ~AbstractInputGenerator() = default; + + virtual variant generate() = 0; + + TypePtr type() const { + return type_; + } + + protected: + FuzzerGenerator rng_; + + TypePtr type_; + + std::unique_ptr next_; +}; + +/// Generates a random string (string size and encoding are passed through +/// Options). +std::string randString( + FuzzerGenerator& rng, + size_t length, + UTF8CharList encoding, + std::wstring_convert, char16_t>& converter); + +std::unique_ptr getRandomInputGenerator( + size_t seed, + const TypePtr& type); + +template +class RandomInputGenerator : public AbstractInputGenerator { + public: + RandomInputGenerator(size_t seed, const TypePtr& type) + : AbstractInputGenerator(seed, type, nullptr) {} + + ~RandomInputGenerator() override = default; + + variant generate() override { + if (type_->isDate()) { + return variant(randDate(rng_)); + } + return variant(rand(rng_)); + } +}; + +template +class RandomInputGenerator>> + : public AbstractInputGenerator { + public: + RandomInputGenerator>>( + size_t seed, + const TypePtr& type, + size_t maxLength = 20) + : AbstractInputGenerator(seed, type, nullptr), maxLength_{maxLength} {} + + ~RandomInputGenerator>>() + override = default; + + variant generate() override { + const auto length = rand(rng_, 0, maxLength_); + std::wstring_convert, char16_t> converter; + return variant(randString(rng_, length, encoding_, converter)); + } + + variant generate(size_t length, UTF8CharList encoding) { + std::wstring_convert, char16_t> converter; + return variant(randString(rng_, length, encoding, converter)); + } + + private: + const size_t maxLength_; + + UTF8CharList encoding_ = UTF8CharList::ASCII; +}; + +template +class RandomInputGenerator>> + : public AbstractInputGenerator { + public: + RandomInputGenerator>>( + size_t seed, + const TypePtr& type, + size_t maxLength = 10, + std::unique_ptr&& elementGenerator = nullptr, + std::optional containAtIndex = std::nullopt, + std::unique_ptr&& containGenerator = nullptr) + : AbstractInputGenerator(seed, type, nullptr), + maxLength_{maxLength}, + elementGenerator_{ + elementGenerator ? std::move(elementGenerator) + : getRandomInputGenerator(seed, type->childAt(0))}, + containAtIndex_{containAtIndex}, + containGenerator_{std::move(containGenerator)} {} + + ~RandomInputGenerator>>() + override = default; + + variant generate() override { + const auto length = rand(rng_, 0, maxLength_); + std::vector elements; + elements.reserve(length); + for (size_t i = 0; i < length; ++i) { + if UNLIKELY (containAtIndex_.has_value() && *containAtIndex_ == i) { + elements.push_back(containGenerator_->generate()); + } else { + elements.push_back(elementGenerator_->generate()); + } + } + return variant::array(elements); + } + + private: + const size_t maxLength_; + + std::unique_ptr elementGenerator_; + + std::optional containAtIndex_; + + std::unique_ptr containGenerator_; +}; + +template +class RandomInputGenerator>> + : public AbstractInputGenerator { + public: + RandomInputGenerator>>( + size_t seed, + const TypePtr& type, + size_t maxLength = 10, + std::unique_ptr&& keyGenerator = nullptr, + std::unique_ptr&& valueGenerator = nullptr, + std::optional containAtIndex = std::nullopt, + std::unique_ptr&& containKeyGenerator = nullptr, + std::unique_ptr&& containValueGenerator = nullptr) + : AbstractInputGenerator(seed, type, nullptr), + maxLength_{maxLength}, + keyGenerator_{ + keyGenerator ? std::move(keyGenerator) + : getRandomInputGenerator(seed, type->childAt(0))}, + valueGenerator_{ + valueGenerator ? std::move(valueGenerator) + : getRandomInputGenerator(seed, type->childAt(1))}, + containAtIndex_{containAtIndex}, + containKeyGenerator_{std::move(containKeyGenerator)}, + containValueGenerator_{std::move(containValueGenerator)} {} + + ~RandomInputGenerator>>() + override = default; + + variant generate() override { + const auto length = rand(rng_, 0, maxLength_); + std::map map; + for (size_t i = 0; i < length; ++i) { + if UNLIKELY (containAtIndex_.has_value() && *containAtIndex_ == i) { + map.emplace( + containKeyGenerator_->generate(), + containValueGenerator_->generate()); + } else { + map.emplace(keyGenerator_->generate(), valueGenerator_->generate()); + } + } + return variant::map(map); + } + + private: + const size_t maxLength_; + + std::unique_ptr keyGenerator_; + + std::unique_ptr valueGenerator_; + + std::optional containAtIndex_; + + std::unique_ptr containKeyGenerator_; + + std::unique_ptr containValueGenerator_; +}; + +template +class RandomInputGenerator>> + : public AbstractInputGenerator { + public: + RandomInputGenerator>>( + size_t seed, + const TypePtr& type, + std::vector> fieldGenerators) + : AbstractInputGenerator(seed, type, nullptr) { + const auto length = type->size(); + fieldGenerators_ = std::move(fieldGenerators); + for (size_t i = 0; i < length; ++i) { + if (fieldGenerators_.size() <= i) { + fieldGenerators_.push_back( + getRandomInputGenerator(seed, type->childAt(i))); + } else if (fieldGenerators_[i] == nullptr) { + fieldGenerators_[i] = getRandomInputGenerator(seed, type->childAt(i)); + } + } + } + + ~RandomInputGenerator>>() + override = default; + + variant generate() override { + const auto length = type_->size(); + std::vector fields; + fields.reserve(length); + for (size_t i = 0; i < length; ++i) { + fields.push_back(fieldGenerators_[i]->generate()); + } + return variant::row(fields); + } + + private: + std::vector> fieldGenerators_; +}; + +class NotEqualConstrainedGenerator : public AbstractInputGenerator { + public: + NotEqualConstrainedGenerator( + size_t seed, + const TypePtr& type, + const variant& excludedValue, + std::unique_ptr&& next) + : AbstractInputGenerator(seed, type, std::move(next)), + excludedValue_{excludedValue} {} + + ~NotEqualConstrainedGenerator() override = default; + + variant generate() override; + + private: + variant excludedValue_; +}; + +class SetConstrainedGenerator : public AbstractInputGenerator { + public: + SetConstrainedGenerator( + size_t seed, + const TypePtr& type, + const std::vector& set) + : AbstractInputGenerator(seed, type, nullptr), set_{set} {} + + ~SetConstrainedGenerator() override = default; + + variant generate() override; + + private: + std::vector set_; +}; + +class JsonInputGenerator : public AbstractInputGenerator { + public: + JsonInputGenerator( + size_t seed, + const TypePtr& type, + std::unique_ptr&& objectGenerator, + bool makeRandomVariation = false) + : AbstractInputGenerator(seed, type, nullptr), + objectGenerator_{std::move(objectGenerator)}, + makeRandomVariation_{makeRandomVariation}, + opts_{getSerializationOptions()} {} + + ~JsonInputGenerator() override = default; + + variant generate() override; + + const folly::json::serialization_opts& serializationOptions() const { + return opts_; + } + + private: + template + folly::dynamic convertVariantToDynamicPrimitive(const variant& v) { + using T = typename TypeTraits::DeepCopiedType; + VELOX_CHECK(v.isSet()); + const T value = v.value(); + return folly::dynamic(value); + } + + folly::dynamic convertVariantToDynamic(const variant& object); + + void makeRandomVariation(std::string json); + + folly::json::serialization_opts getSerializationOptions(); + + std::unique_ptr objectGenerator_; + + bool makeRandomVariation_; + + folly::json::serialization_opts opts_; +}; + +} // namespace facebook::velox::fuzzer diff --git a/velox/experimental/fuzzer_input_generator/ConstrainedVectorGenerator.cpp b/velox/experimental/fuzzer_input_generator/ConstrainedVectorGenerator.cpp new file mode 100644 index 0000000000000..20797769ea766 --- /dev/null +++ b/velox/experimental/fuzzer_input_generator/ConstrainedVectorGenerator.cpp @@ -0,0 +1,149 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/experimental/fuzzer_input_generator/ConstrainedVectorGenerator.h" + +#include "velox/expression/VectorWriters.h" + +namespace facebook::velox::fuzzer { + +using exec::GenericWriter; +using exec::VectorWriter; + +// static +VectorPtr ConstrainedVectorGenerator::generateConstant( + const std::shared_ptr& customGenerator, + vector_size_t size, + memory::MemoryPool* pool) { + VELOX_CHECK_NOT_NULL(customGenerator); + VELOX_CHECK(customGenerator->type()->isPrimitiveType()); + + const auto& type = customGenerator->type(); + const auto variant = customGenerator->generate(); + + return BaseVector::createConstant(type, variant, size, pool); +} + +template +void writeOne(const variant& value, GenericWriter& writer); +template <> +void writeOne(const variant& value, GenericWriter& writer); +template <> +void writeOne(const variant& value, GenericWriter& writer); +template <> +void writeOne(const variant& value, GenericWriter& writer); +template <> +void writeOne(const variant& value, GenericWriter& writer); +template <> +void writeOne(const variant& value, GenericWriter& writer); + +template +void writeOne(const variant& value, GenericWriter& writer) { + using T = typename TypeTraits::NativeType; + writer.template castTo() = value.value(); +} + +template <> +void writeOne(const variant& value, GenericWriter& writer) { + writer.template castTo() = value.value(); +} + +template <> +void writeOne( + const variant& value, + GenericWriter& writer) { + writer.template castTo() = value.value(); +} + +template <> +void writeOne(const variant& value, GenericWriter& writer) { + auto& writerTyped = writer.template castTo>(); + const auto& elements = value.array(); + for (const auto& element : elements) { + if (element.isNull()) { + writerTyped.add_null(); + } else { + VELOX_DYNAMIC_TYPE_DISPATCH( + writeOne, element.kind(), element, writerTyped.add_item()); + } + } +} + +template <> +void writeOne(const variant& value, GenericWriter& writer) { + auto& writerTyped = writer.template castTo>(); + const auto& map = value.map(); + for (const auto& pair : map) { + const auto& key = pair.first; + const auto& value = pair.second; + VELOX_CHECK(!key.isNull()); + if (value.isNull()) { + VELOX_DYNAMIC_TYPE_DISPATCH( + writeOne, key.kind(), key, writerTyped.add_null()); + } else { + auto writers = writerTyped.add_item(); + VELOX_DYNAMIC_TYPE_DISPATCH( + writeOne, key.kind(), key, std::get<0>(writers)); + VELOX_DYNAMIC_TYPE_DISPATCH( + writeOne, value.kind(), value, std::get<1>(writers)); + } + } +} + +template <> +void writeOne(const variant& value, GenericWriter& writer) { + auto& writerTyped = writer.template castTo(); + const auto& elements = value.row(); + column_index_t i = 0; + for (const auto& element : elements) { + if (element.isNull()) { + writerTyped.set_null_at(i); + } else { + VELOX_DYNAMIC_TYPE_DISPATCH( + writeOne, element.kind(), element, writerTyped.get_writer_at(i)); + } + i++; + } +} + +// static +VectorPtr ConstrainedVectorGenerator::generateFlat( + const std::shared_ptr& customGenerator, + vector_size_t size, + memory::MemoryPool* pool) { + VELOX_CHECK_NOT_NULL(customGenerator); + + VectorPtr result; + const auto& type = customGenerator->type(); + BaseVector::ensureWritable(SelectivityVector(size), type, pool, result); + VectorWriter writer; + writer.init(*result); + + for (auto i = 0; i < size; ++i) { + writer.setOffset(i); + const auto variant = customGenerator->generate(); + if (variant.isNull()) { + writer.commitNull(); + } else { + VELOX_DYNAMIC_TYPE_DISPATCH( + writeOne, type->kind(), variant, writer.current()); + writer.commit(true); + } + } + return result; +} + +} // namespace facebook::velox::fuzzer diff --git a/velox/experimental/fuzzer_input_generator/ConstrainedVectorGenerator.h b/velox/experimental/fuzzer_input_generator/ConstrainedVectorGenerator.h new file mode 100644 index 0000000000000..555b905f46b0a --- /dev/null +++ b/velox/experimental/fuzzer_input_generator/ConstrainedVectorGenerator.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "velox/experimental/fuzzer_input_generator/ConstrainedGenerators.h" + +namespace facebook::velox::fuzzer { + +class ConstrainedVectorGenerator { + public: + ConstrainedVectorGenerator() = delete; + + static VectorPtr generateConstant( + const std::shared_ptr& customGenerator, + vector_size_t size, + memory::MemoryPool* pool); + + static VectorPtr generateFlat( + const std::shared_ptr& customGenerator, + vector_size_t size, + memory::MemoryPool* pool); +}; + +} // namespace facebook::velox::fuzzer diff --git a/velox/experimental/fuzzer_input_generator/tests/CMakeLists.txt b/velox/experimental/fuzzer_input_generator/tests/CMakeLists.txt new file mode 100644 index 0000000000000..3f781a64aaf3b --- /dev/null +++ b/velox/experimental/fuzzer_input_generator/tests/CMakeLists.txt @@ -0,0 +1,28 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_executable(velox_fuzzer_constrained_input_generators_test + ConstrainedGeneratorsTest.cpp) + +add_test( + NAME velox_fuzzer_constrained_input_generators_test + COMMAND velox_fuzzer_constrained_input_generators_test + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + +target_link_libraries( + velox_fuzzer_constrained_input_generators_test + velox_fuzzer_constrained_input_generators + velox_presto_types + velox_type + velox_vector_test_lib) diff --git a/velox/experimental/fuzzer_input_generator/tests/ConstrainedGeneratorsTest.cpp b/velox/experimental/fuzzer_input_generator/tests/ConstrainedGeneratorsTest.cpp new file mode 100644 index 0000000000000..1016da37179d9 --- /dev/null +++ b/velox/experimental/fuzzer_input_generator/tests/ConstrainedGeneratorsTest.cpp @@ -0,0 +1,260 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/experimental/fuzzer_input_generator/ConstrainedGenerators.h" + +#include + +#include "velox/experimental/fuzzer_input_generator/ConstrainedVectorGenerator.h" +#include "velox/functions/prestosql/types/JsonType.h" +#include "velox/type/Variant.h" +#include "velox/vector/tests/utils/VectorTestBase.h" + +namespace facebook::velox::fuzzer::test { + +class ConstrainedGeneratorsTest : public testing::Test, + public velox::test::VectorTestBase { + protected: + static void SetUpTestCase() { + memory::MemoryManager::testingSetInstance({}); + } + + template + void testRandomPrimitive(const TypePtr& type) { + VELOX_CHECK_EQ(type->kind(), KIND); + using T = typename TypeTraits::NativeType; + + std::unique_ptr generator = + std::make_unique>(0, type); + auto value = generator->generate(); + EXPECT_TRUE(value.hasValue()); + EXPECT_EQ(value.kind(), KIND); + } + + template + void testRandomComplex(const TypePtr& type) { + VELOX_CHECK_EQ(type->kind(), KIND); + using T = typename TypeTraits::ImplType; + + std::unique_ptr generator = + std::make_unique>(0, type); + auto value = generator->generate(); + EXPECT_TRUE(value.hasValue()); + EXPECT_EQ(value.kind(), KIND); // TODO: check type recursive + } + + template + void testNotEqualPrimitive(const TypePtr& type, const TValue& excludedValue) { + VELOX_CHECK_EQ(type->kind(), KIND); + using T = typename TypeTraits::NativeType; + + variant excludedVariant{excludedValue}; + std::unique_ptr generator = + std::make_unique( + 0, + type, + excludedVariant, + std::make_unique>(0, type)); + + const uint32_t kIterations = 1000; + for (uint32_t i = 0; i < kIterations; ++i) { + auto value = generator->generate(); + EXPECT_TRUE(value.hasValue()); + EXPECT_EQ(value.kind(), KIND); + EXPECT_NE(value, excludedVariant); + } + } + + template + void testNotEqualComplex( + const TypePtr& type, + const variant& excludedVariant) { + VELOX_CHECK_EQ(type->kind(), KIND); + using T = typename TypeTraits::ImplType; + + std::unique_ptr generator = + std::make_unique( + 0, + type, + excludedVariant, + std::make_unique>(0, type)); + + const uint32_t kIterations = 1000; + for (uint32_t i = 0; i < kIterations; ++i) { + auto value = generator->generate(); + EXPECT_TRUE(value.hasValue()); + EXPECT_EQ(value.kind(), KIND); // todo: check type recursive + EXPECT_NE(value, excludedVariant); + } + } + + template + void testSetPrimitive(const TypePtr& type, const TSet& setOfRawValues) { + VELOX_CHECK_EQ(type->kind(), KIND); + using T = typename TypeTraits::NativeType; + + const uint32_t kIterations = 1000; + std::vector variants; + for (const auto& value : setOfRawValues) { + variants.push_back(variant{value}); + } + std::unique_ptr generator = + std::make_unique(0, type, variants); + + for (uint32_t i = 0; i < kIterations; ++i) { + auto value = generator->generate(); + EXPECT_TRUE(value.hasValue()); + EXPECT_EQ(value.kind(), KIND); + EXPECT_NE(setOfRawValues.count(value.value()), 0); + } + } + + template + void testSetComplex( + const TypePtr& type, + const std::vector& variants) { + VELOX_CHECK_EQ(type->kind(), KIND); + using T = typename TypeTraits::ImplType; + + std::set setOfVariants{variants.begin(), variants.end()}; + + std::unique_ptr generator = + std::make_unique(0, type, variants); + + const uint32_t kIterations = 1000; + for (uint32_t i = 0; i < kIterations; ++i) { + auto value = generator->generate(); + EXPECT_TRUE(value.hasValue()); + EXPECT_EQ(value.kind(), KIND); // todo: check type recursive + EXPECT_NE(setOfVariants.count(value), 0); + } + } + + template + void testGenerateVectorsPrimitive( + const TypePtr& type, + const variant& excludedValue) { + using T = typename TypeTraits::NativeType; + const uint32_t kSize = 1000; + std::shared_ptr generator = + std::make_shared( + 0, + type, + excludedValue, + std::make_unique>(0, type)); + auto vector = + ConstrainedVectorGenerator::generateConstant(generator, kSize, pool()); + EXPECT_EQ(vector->size(), kSize); + EXPECT_EQ(vector->typeKind(), KIND); + EXPECT_TRUE(vector->isConstantEncoding()); + EXPECT_NE(vector->as>()->valueAt(0), excludedValue); + + vector = ConstrainedVectorGenerator::generateFlat(generator, kSize, pool()); + EXPECT_EQ(vector->size(), kSize); + EXPECT_EQ(vector->typeKind(), KIND); + EXPECT_TRUE(vector->isFlatEncoding()); + for (auto i = 0; i < kSize; ++i) { + EXPECT_NE(vector->as>()->valueAt(i), excludedValue); + } + } + + template + void testGenerateVectorsComplex(const TypePtr& type) { + using T = typename TypeTraits::ImplType; + const uint32_t kSize = 1000; + std::shared_ptr generator = + std::make_shared>(0, type); + auto vector = + ConstrainedVectorGenerator::generateFlat(generator, kSize, pool()); + EXPECT_EQ(vector->size(), kSize); + EXPECT_EQ(vector->type(), type); + } +}; + +TEST_F(ConstrainedGeneratorsTest, randomPrimitive) { + testRandomPrimitive(INTEGER()); + + testRandomPrimitive(VARCHAR()); +} + +TEST_F(ConstrainedGeneratorsTest, randomComplex) { + testRandomComplex(ARRAY(MAP(VARCHAR(), ROW({BIGINT()})))); +} + +TEST_F(ConstrainedGeneratorsTest, notEqPrimitive) { + testNotEqualPrimitive(TINYINT(), static_cast(1)); + + testNotEqualPrimitive(VARCHAR(), ""_sv); +} + +TEST_F(ConstrainedGeneratorsTest, notEqComplex) { + auto excludedVariant = variant::array({variant::map( + {{variant{"1"}, variant::row({variant{static_cast(1)}})}})}); + testNotEqualComplex( + ARRAY(MAP(VARCHAR(), ROW({BIGINT()}))), excludedVariant); +} + +TEST_F(ConstrainedGeneratorsTest, setPrimitive) { + std::unordered_set integers{{1, 2, 3}}; + testSetPrimitive(INTEGER(), integers); + + std::unordered_set strings{{"1", "2", "3"}}; + testSetPrimitive(VARCHAR(), strings); +} + +TEST_F(ConstrainedGeneratorsTest, setComplex) { + std::vector variants{ + variant::array({variant::map( + {{variant{"1"}, variant::row({variant{static_cast(1)}})}})}), + variant::array({variant::map( + {{variant{"2"}, + variant::row({variant{static_cast(2)}})}})})}; + testSetComplex( + ARRAY(MAP(VARCHAR(), ROW({BIGINT()}))), variants); +} + +TEST_F(ConstrainedGeneratorsTest, json) { + const TypePtr type = ARRAY(MAP(DOUBLE(), ROW({BIGINT()}))); + std::unique_ptr generator = + std::make_unique( + 0, + JSON(), + std::make_unique>(0, type)); + + const uint32_t kIterations = 1000; + const auto& opts = generator->serializationOptions(); + for (uint32_t i = 0; i < kIterations; ++i) { + auto value = generator->generate(); + EXPECT_TRUE(value.hasValue()); + EXPECT_EQ(value.kind(), TypeKind::VARCHAR); + folly::dynamic json; + EXPECT_NO_THROW( + json = folly::parseJson(value.value(), opts)); + EXPECT_TRUE(json.isArray()); + } +} + +TEST_F(ConstrainedGeneratorsTest, generateVectors) { + testGenerateVectorsPrimitive(BIGINT(), variant(0)); + testGenerateVectorsPrimitive(VARCHAR(), variant("")); + + testGenerateVectorsComplex( + ARRAY(ROW({MAP(VARCHAR(), BIGINT())}))); + testGenerateVectorsComplex( + MAP(ARRAY(BIGINT()), ROW({VARCHAR()}))); +} + +} // namespace facebook::velox::fuzzer::test diff --git a/velox/vector/fuzzer/Utils.cpp b/velox/vector/fuzzer/Utils.cpp index 805fcf1063d73..f8bab45af6940 100644 --- a/velox/vector/fuzzer/Utils.cpp +++ b/velox/vector/fuzzer/Utils.cpp @@ -16,13 +16,53 @@ #include "velox/vector/fuzzer/Utils.h" -namespace facebook::velox::generator_spec_utils { +namespace facebook::velox { bool coinToss(FuzzerGenerator& rng, double threshold) { static std::uniform_real_distribution<> dist(0.0, 1.0); return dist(rng) < threshold; } +Timestamp randTimestamp( + FuzzerGenerator& rng, + FuzzerTimestampPrecision timestampPrecision) { + // Generate timestamps only in the valid range to avoid datetime functions, + // such as try_cast(varchar as timestamp), throwing VeloxRuntimeError in + // fuzzers. + constexpr int64_t min = -2'140'671'600; + constexpr int64_t max = 2'140'671'600; + constexpr int64_t microInSecond = 1'000'000; + constexpr int64_t millisInSecond = 1'000; + // DWRF requires nano to be in a certain range. Hardcode the value here to + // avoid the dependency on DWRF. + constexpr int64_t MAX_NANOS = 1'000'000'000; + + switch (timestampPrecision) { + case FuzzerTimestampPrecision::kNanoSeconds: + return Timestamp( + rand(rng, min, max), (rand(rng) % MAX_NANOS)); + case FuzzerTimestampPrecision::kMicroSeconds: + return Timestamp::fromMicros( + rand(rng, min, max) * microInSecond + + rand(rng, -microInSecond, microInSecond)); + case FuzzerTimestampPrecision::kMilliSeconds: + return Timestamp::fromMillis( + rand(rng, min, max) * millisInSecond + + rand(rng, -millisInSecond, millisInSecond)); + case FuzzerTimestampPrecision::kSeconds: + return Timestamp(rand(rng, min, max), 0); + } + return {}; // no-op. +} + +int32_t randDate(FuzzerGenerator& rng) { + constexpr int64_t min = -24'450; + constexpr int64_t max = 24'450; + return rand(rng, min, max); +} + +namespace generator_spec_utils { + vector_size_t getRandomIndex(FuzzerGenerator& rng, vector_size_t maxIndex) { std::uniform_int_distribution indexGenerator( 0, maxIndex); // generates index in [0, maxIndex] @@ -59,4 +99,6 @@ BufferPtr generateIndicesBuffer( return indices; } -} // namespace facebook::velox::generator_spec_utils +} // namespace generator_spec_utils + +} // namespace facebook::velox diff --git a/velox/vector/fuzzer/Utils.h b/velox/vector/fuzzer/Utils.h index 0248b08f942fa..fbbed12a0b102 100644 --- a/velox/vector/fuzzer/Utils.h +++ b/velox/vector/fuzzer/Utils.h @@ -16,6 +16,11 @@ #pragma once +#include + +#include +#include + #include "velox/vector/BaseVector.h" #include "velox/vector/NullsBuilder.h" @@ -23,6 +28,57 @@ namespace facebook::velox { using FuzzerGenerator = std::mt19937; +enum UTF8CharList { + ASCII = 0, // Ascii character set. + UNICODE_CASE_SENSITIVE = 1, // Unicode scripts that support case. + EXTENDED_UNICODE = 2, // Extended Unicode: Arabic, Devanagiri etc + MATHEMATICAL_SYMBOLS = 3 // Mathematical Symbols. +}; + +/// Unicode character ranges. Ensure the vector indexes match the UTF8CharList +/// enum values. +/// +/// Source: https://jrgraphix.net/research/unicode_blocks.php +static const std::vector>> + kUTFChatSets{ + // UTF8CharList::ASCII + { + {33, 127}, // All ASCII printable chars. + }, + // UTF8CharList::UNICODE_CASE_SENSITIVE + { + {u'\u0020', u'\u007F'}, // Basic Latin. + {u'\u0400', u'\u04FF'}, // Cyrillic. + }, + // UTF8CharList::EXTENDED_UNICODE + { + {u'\u03F0', u'\u03FF'}, // Greek. + {u'\u0100', u'\u017F'}, // Latin Extended A. + {u'\u0600', u'\u06FF'}, // Arabic. + {u'\u0900', u'\u097F'}, // Devanagari. + {u'\u0600', u'\u06FF'}, // Hebrew. + {u'\u3040', u'\u309F'}, // Hiragana. + {u'\u2000', u'\u206F'}, // Punctuation. + {u'\u2070', u'\u209F'}, // Sub/Super Script. + {u'\u20A0', u'\u20CF'}, // Currency. + }, + // UTF8CharList::MATHEMATICAL_SYMBOLS + { + {u'\u2200', u'\u22FF'}, // Math Operators. + {u'\u2150', u'\u218F'}, // Number Forms. + {u'\u25A0', u'\u25FF'}, // Geometric Shapes. + {u'\u27C0', u'\u27EF'}, // Math Symbols. + {u'\u2A00', u'\u2AFF'}, // Supplemental. + }, + }; + +bool coinToss(FuzzerGenerator& rng, double threshold); + +struct DataSpec { + bool includeNaN; + bool includeInfinity; +}; + enum class FuzzerTimestampPrecision : int8_t { kNanoSeconds = 0, kMicroSeconds = 1, @@ -30,9 +86,96 @@ enum class FuzzerTimestampPrecision : int8_t { kSeconds = 3, }; -namespace generator_spec_utils { +// Generate random values for the different supported types. +template +inline T rand(FuzzerGenerator& rng, DataSpec dataSpec = {false, false}) { + VELOX_NYI(); +} -bool coinToss(FuzzerGenerator& rng, double threshold); +template <> +inline int8_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return boost::random::uniform_int_distribution()(rng); +} + +template <> +inline int16_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return boost::random::uniform_int_distribution()(rng); +} + +template <> +inline int32_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return boost::random::uniform_int_distribution()(rng); +} + +template <> +inline int64_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return boost::random::uniform_int_distribution()(rng); +} + +template <> +inline double rand(FuzzerGenerator& rng, DataSpec dataSpec) { + if (dataSpec.includeNaN && coinToss(rng, 0.05)) { + return std::nan(""); + } + + if (dataSpec.includeInfinity && coinToss(rng, 0.05)) { + return std::numeric_limits::infinity(); + } + + return boost::random::uniform_01()(rng); +} + +template <> +inline float rand(FuzzerGenerator& rng, DataSpec dataSpec) { + if (dataSpec.includeNaN && coinToss(rng, 0.05)) { + return std::nanf(""); + } + + if (dataSpec.includeInfinity && coinToss(rng, 0.05)) { + return std::numeric_limits::infinity(); + } + + return boost::random::uniform_01()(rng); +} + +template <> +inline bool rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return boost::random::uniform_int_distribution(0, 1)(rng); +} + +template <> +inline uint32_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return boost::random::uniform_int_distribution()(rng); +} + +template <> +inline uint64_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return boost::random::uniform_int_distribution()(rng); +} + +template <> +inline int128_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return HugeInt::build(rand(rng), rand(rng)); +} + +Timestamp randTimestamp( + FuzzerGenerator& rng, + FuzzerTimestampPrecision timestampPrecision); + +template <> +inline Timestamp rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + // TODO: support other timestamp precisions. + return randTimestamp(rng, FuzzerTimestampPrecision::kMicroSeconds); +} + +int32_t randDate(FuzzerGenerator& rng); + +template , int> = 0> +inline T rand(FuzzerGenerator& rng, T min, T max) { + return boost::random::uniform_int_distribution(min, max)(rng); +} + +namespace generator_spec_utils { vector_size_t getRandomIndex(FuzzerGenerator& rng, vector_size_t maxIndex); diff --git a/velox/vector/fuzzer/VectorFuzzer.cpp b/velox/vector/fuzzer/VectorFuzzer.cpp index 9797bc56632a7..cc8f793d28e04 100644 --- a/velox/vector/fuzzer/VectorFuzzer.cpp +++ b/velox/vector/fuzzer/VectorFuzzer.cpp @@ -27,15 +27,12 @@ #include "velox/vector/FlatVector.h" #include "velox/vector/NullsBuilder.h" #include "velox/vector/VectorTypeUtils.h" +#include "velox/vector/fuzzer/Utils.h" namespace facebook::velox { namespace { -// DWRF requires nano to be in a certain range. Hardcode the value here to avoid -// the dependency on DWRF. -constexpr int64_t MAX_NANOS = 1'000'000'000; - // Structure to help temporary changes to Options. This objects saves the // current state of the Options object, and restores it when it's destructed. // For instance, if you would like to temporarily disable nulls for a particular @@ -62,116 +59,6 @@ struct ScopedOptions { VectorFuzzer::Options savedOpts; }; -// Generate random values for the different supported types. -template -T rand(FuzzerGenerator& rng, DataSpec dataSpec = {false, false}) { - VELOX_NYI(); -} - -template <> -int8_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return boost::random::uniform_int_distribution()(rng); -} - -template <> -int16_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return boost::random::uniform_int_distribution()(rng); -} - -template <> -int32_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return boost::random::uniform_int_distribution()(rng); -} - -template <> -int64_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return boost::random::uniform_int_distribution()(rng); -} - -template <> -double rand(FuzzerGenerator& rng, DataSpec dataSpec) { - if (dataSpec.includeNaN && coinToss(rng, 0.05)) { - return std::nan(""); - } - - if (dataSpec.includeInfinity && coinToss(rng, 0.05)) { - return std::numeric_limits::infinity(); - } - - return boost::random::uniform_01()(rng); -} - -template <> -float rand(FuzzerGenerator& rng, DataSpec dataSpec) { - if (dataSpec.includeNaN && coinToss(rng, 0.05)) { - return std::nanf(""); - } - - if (dataSpec.includeInfinity && coinToss(rng, 0.05)) { - return std::numeric_limits::infinity(); - } - - return boost::random::uniform_01()(rng); -} - -template <> -bool rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return boost::random::uniform_int_distribution(0, 1)(rng); -} - -template <> -uint32_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return boost::random::uniform_int_distribution()(rng); -} - -template <> -uint64_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return boost::random::uniform_int_distribution()(rng); -} - -template <> -int128_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return HugeInt::build(rand(rng), rand(rng)); -} - -template , int> = 0> -T rand(FuzzerGenerator& rng, T min, T max) { - return boost::random::uniform_int_distribution(min, max)(rng); -} - -Timestamp randTimestamp(FuzzerGenerator& rng, VectorFuzzer::Options opts) { - // Generate timestamps only in the valid range to avoid datetime functions, - // such as try_cast(varchar as timestamp), throwing VeloxRuntimeError in - // fuzzers. - constexpr int64_t min = -2'140'671'600; - constexpr int64_t max = 2'140'671'600; - constexpr int64_t microInSecond = 1'000'000; - constexpr int64_t millisInSecond = 1'000; - - switch (opts.timestampPrecision) { - case FuzzerTimestampPrecision::kNanoSeconds: - return Timestamp( - rand(rng, min, max), (rand(rng) % MAX_NANOS)); - case FuzzerTimestampPrecision::kMicroSeconds: - return Timestamp::fromMicros( - rand(rng, min, max) * microInSecond + - rand(rng, -microInSecond, microInSecond)); - case FuzzerTimestampPrecision::kMilliSeconds: - return Timestamp::fromMillis( - rand(rng, min, max) * millisInSecond + - rand(rng, -millisInSecond, millisInSecond)); - case FuzzerTimestampPrecision::kSeconds: - return Timestamp(rand(rng, min, max), 0); - } - return {}; // no-op. -} - -int32_t randDate(FuzzerGenerator& rng) { - constexpr int64_t min = -24'450; - constexpr int64_t max = 24'450; - return rand(rng, min, max); -} - size_t getElementsVectorLength( const VectorFuzzer::Options& opts, vector_size_t size) { @@ -196,42 +83,6 @@ int128_t randLongDecimal(const TypePtr& type, FuzzerGenerator& rng) { return rand(rng) % DecimalUtil::kPowersOfTen[precision]; } -/// Unicode character ranges. Ensure the vector indexes match the UTF8CharList -/// enum values. -/// -/// Source: https://jrgraphix.net/research/unicode_blocks.php -const std::vector>> kUTFChatSets{ - // UTF8CharList::ASCII - { - {33, 127}, // All ASCII printable chars. - }, - // UTF8CharList::UNICODE_CASE_SENSITIVE - { - {u'\u0020', u'\u007F'}, // Basic Latin. - {u'\u0400', u'\u04FF'}, // Cyrillic. - }, - // UTF8CharList::EXTENDED_UNICODE - { - {u'\u03F0', u'\u03FF'}, // Greek. - {u'\u0100', u'\u017F'}, // Latin Extended A. - {u'\u0600', u'\u06FF'}, // Arabic. - {u'\u0900', u'\u097F'}, // Devanagari. - {u'\u0600', u'\u06FF'}, // Hebrew. - {u'\u3040', u'\u309F'}, // Hiragana. - {u'\u2000', u'\u206F'}, // Punctuation. - {u'\u2070', u'\u209F'}, // Sub/Super Script. - {u'\u20A0', u'\u20CF'}, // Currency. - }, - // UTF8CharList::MATHEMATICAL_SYMBOLS - { - {u'\u2200', u'\u22FF'}, // Math Operators. - {u'\u2150', u'\u218F'}, // Number Forms. - {u'\u25A0', u'\u25FF'}, // Geometric Shapes. - {u'\u27C0', u'\u27EF'}, // Math Symbols. - {u'\u2A00', u'\u2AFF'}, // Supplemental. - }, -}; - FOLLY_ALWAYS_INLINE char16_t getRandomChar( FuzzerGenerator& rng, const std::vector>& charSet) { @@ -290,7 +141,7 @@ VectorPtr fuzzConstantPrimitiveImpl( } if constexpr (std::is_same_v) { return std::make_shared>( - pool, size, false, type, randTimestamp(rng, opts)); + pool, size, false, type, randTimestamp(rng, opts.timestampPrecision)); } else if (type->isDate()) { return std::make_shared>( pool, size, false, type, randDate(rng)); @@ -322,7 +173,7 @@ void fuzzFlatPrimitiveImpl( if constexpr (std::is_same_v) { flatVector->set(i, randString(rng, opts, strBuf, converter)); } else if constexpr (std::is_same_v) { - flatVector->set(i, randTimestamp(rng, opts)); + flatVector->set(i, randTimestamp(rng, opts.timestampPrecision)); } else if constexpr (std::is_same_v) { if (vector->type()->isShortDecimal()) { flatVector->set(i, randShortDecimal(vector->type(), rng)); diff --git a/velox/vector/fuzzer/VectorFuzzer.h b/velox/vector/fuzzer/VectorFuzzer.h index 9c5691aa144eb..75254d153c4ea 100644 --- a/velox/vector/fuzzer/VectorFuzzer.h +++ b/velox/vector/fuzzer/VectorFuzzer.h @@ -27,18 +27,6 @@ namespace facebook::velox { -enum UTF8CharList { - ASCII = 0, // Ascii character set. - UNICODE_CASE_SENSITIVE = 1, // Unicode scripts that support case. - EXTENDED_UNICODE = 2, // Extended Unicode: Arabic, Devanagiri etc - MATHEMATICAL_SYMBOLS = 3 // Mathematical Symbols. -}; - -struct DataSpec { - bool includeNaN; - bool includeInfinity; -}; - const std::vector& defaultScalarTypes(); /// VectorFuzzer is a helper class that generates randomized vectors and their