diff --git a/velox/CMakeLists.txt b/velox/CMakeLists.txt index 06ae8bf1c0533..c0a6206313893 100644 --- a/velox/CMakeLists.txt +++ b/velox/CMakeLists.txt @@ -25,6 +25,7 @@ add_subdirectory(flag_definitions) add_subdirectory(external/date) add_subdirectory(external/md5) add_subdirectory(external/hdfs) +add_subdirectory(experimental/fuzzer_input_generator) # # examples depend on expression diff --git a/velox/experimental/fuzzer_input_generator/CMakeLists.txt b/velox/experimental/fuzzer_input_generator/CMakeLists.txt new file mode 100644 index 0000000000000..e1a493b85c467 --- /dev/null +++ b/velox/experimental/fuzzer_input_generator/CMakeLists.txt @@ -0,0 +1,30 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +velox_add_library(velox_fuzzer_constrained_input_generators + ConstrainedGenerators.cpp ConstrainedVectorGenerator.cpp) + +velox_link_libraries( + velox_fuzzer_constrained_input_generators + Folly::folly + velox_expression + velox_type + velox_vector_fuzzer_util) +if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + target_compile_options(velox_fuzzer_constrained_input_generators + PRIVATE -Wno-deprecated-declarations) +endif() + +if(${VELOX_BUILD_TESTING}) + add_subdirectory(tests) +endif() diff --git a/velox/experimental/fuzzer_input_generator/ConstrainedGenerators.cpp b/velox/experimental/fuzzer_input_generator/ConstrainedGenerators.cpp new file mode 100644 index 0000000000000..736003a3d2847 --- /dev/null +++ b/velox/experimental/fuzzer_input_generator/ConstrainedGenerators.cpp @@ -0,0 +1,221 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/experimental/fuzzer_input_generator/ConstrainedGenerators.h" + +#include + +#include "velox/vector/fuzzer/Utils.h" + +namespace facebook::velox::fuzzer { + +FOLLY_ALWAYS_INLINE char16_t getRandomChar( + FuzzerGenerator& rng, + const std::vector>& charSet) { + const auto& chars = charSet.size() == 1 + ? charSet.front() + : charSet[rand(rng) % charSet.size()]; + auto size = chars.second - chars.first; + auto inc = (rand(rng) % size); + char16_t res = chars.first + inc; + return res; +} + +/// Generates a random string (string size and encoding are passed through +/// Options). +std::string randString( + FuzzerGenerator& rng, + size_t length, + UTF8CharList encoding, + std::wstring_convert, char16_t>& converter) { + std::string buf; + std::u16string wbuf; + wbuf.resize(length); + + for (size_t i = 0; i < length; ++i) { + wbuf[i] = getRandomChar(rng, kUTFChatSets[encoding]); + } + buf.append(converter.to_bytes(wbuf)); + return buf; +} + +// AbstractInputGenerator +AbstractInputGenerator::AbstractInputGenerator( + size_t seed, + const TypePtr& type, + std::unique_ptr&& next) + : type_{type}, next_{std::move(next)} { + rng_.seed(seed); +} + +// NotEqualConstrainedGenerator +variant NotEqualConstrainedGenerator::generate() { + variant value; + do { + value = next_->generate(); + } while (value == excludedValue_); + return value; +} + +// SetConstrainedGenerator +variant SetConstrainedGenerator::generate() { + const auto index = + boost::random::uniform_int_distribution(0, set_.size() - 1)(rng_); + return set_[index]; +} + +// JsonInputGenerator +folly::json::serialization_opts JsonInputGenerator::getSerializationOptions() { + folly::json::serialization_opts opts; + opts.allow_non_string_keys = true; + opts.allow_nan_inf = true; + if (makeRandomVariation_) { + opts.convert_int_keys = rand(rng_); + opts.pretty_formatting = rand(rng_); + opts.pretty_formatting_indent_width = rand(rng_, 0, 4); + opts.encode_non_ascii = rand(rng_); + opts.allow_trailing_comma = rand(rng_); + opts.sort_keys = rand(rng_); + opts.skip_invalid_utf8 = rand(rng_); + opts.parse_numbers_as_strings = rand(rng_); + } + return opts; +} + +variant JsonInputGenerator::generate() { + const auto object = objectGenerator_->generate(); + const folly::dynamic jsonObject = convertVariantToDynamic(object); + const auto jsonString = folly::json::serialize(jsonObject, opts_); + if (makeRandomVariation_ && coinToss(rng_, 0.5)) { + makeRandomVariation(jsonString); + } + return variant(jsonString); +} + +folly::dynamic JsonInputGenerator::convertVariantToDynamic( + const variant& object) { + if (object.isNull()) { + return folly::dynamic(); + } + + switch (object.kind()) { + case TypeKind::BOOLEAN: + return convertVariantToDynamicPrimitive(object); + case TypeKind::TINYINT: + return convertVariantToDynamicPrimitive(object); + case TypeKind::SMALLINT: + return convertVariantToDynamicPrimitive(object); + case TypeKind::INTEGER: + return convertVariantToDynamicPrimitive(object); + case TypeKind::BIGINT: + return convertVariantToDynamicPrimitive(object); + case TypeKind::REAL: + return convertVariantToDynamicPrimitive(object); + case TypeKind::DOUBLE: + return convertVariantToDynamicPrimitive(object); + case TypeKind::VARCHAR: + return convertVariantToDynamicPrimitive(object); + case TypeKind::VARBINARY: + return convertVariantToDynamicPrimitive(object); + case TypeKind::TIMESTAMP: + return convertVariantToDynamicPrimitive(object); + case TypeKind::HUGEINT: + return convertVariantToDynamicPrimitive(object); + case TypeKind::ARRAY: { + folly::dynamic array = folly::dynamic::array; + for (const auto& element : object.value()) { + array.push_back(convertVariantToDynamic(element)); + } + return array; + } + case TypeKind::MAP: { + folly::dynamic map = folly::dynamic::object; + for (const auto& [key, value] : object.value()) { + map[convertVariantToDynamic(key)] = convertVariantToDynamic(value); + } + return map; + } + case TypeKind::ROW: { + folly::dynamic array = folly::dynamic::array; + for (const auto& element : object.value()) { + array.push_back(convertVariantToDynamic(element)); + } + return array; + } + default: + VELOX_UNREACHABLE("Unsupported type"); + } +} + +std::vector getControlCharacters() { + static std::vector controlCharacters = { + "\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06", + "\x07", "\x08", "\x09", "\x0A", "\x0B", "\x0C", "\x0D", + "\x0E", "\x0F", "\x10", "\x11", "\x12", "\x13", "\x14", + "\x15", "\x16", "\x17", "\x18", "\x19", "\x1A", "\x1B", + "\x1C", "\x1D", "\x1E", "\x1F", "\x20", "\x7F", "\u0080", + "\u0081", "\u0082", "\u0083", "\u0084", "\u0085", "\u0086", "\u0087", + "\u0088", "\u0089", "\u008A", "\u008B", "\u008C", "\u008D", "\u008E", + "\u008F", "\u0090", "\u0091", "\u0092", "\u0093", "\u0094", "\u0095", + "\u0096", "\u0097", "\u0098", "\u0099", "\u009A", "\u009B", "\u009C", + "\u009D", "\u009E", "\u009F"}; + return controlCharacters; +}; + +void JsonInputGenerator::makeRandomVariation(std::string json) { + if (coinToss(rng_, 0.1)) { + const auto controlCharacters = getControlCharacters(); + const auto index = rand(rng_, 0, controlCharacters.size() - 1); + const auto controlCharacter = controlCharacters[index]; + const auto indexToInsert = rand(rng_, 0, json.size()); + json.insert(indexToInsert, controlCharacter); + } else if (coinToss(rng_, 0.1)) { + const auto size = rand(rng_, 0, json.size()); + json.resize(size); + } +} + +// Utility functions +template +std::unique_ptr getRandomInputGeneratorPrimitive( + size_t seed, + const TypePtr& type) { + using T = typename TypeTraits::NativeType; + std::unique_ptr generator = + std::make_unique>(seed, type); + return generator; +} + +std::unique_ptr getRandomInputGenerator( + size_t seed, + const TypePtr& type) { + std::unique_ptr generator; + if (type->isPrimitiveType()) { + return VELOX_DYNAMIC_SCALAR_TEMPLATE_TYPE_DISPATCH( + getRandomInputGeneratorPrimitive, false, type->kind(), seed, type); + } else if (type->isArray()) { + generator = std::make_unique>(seed, type); + } else if (type->isMap()) { + generator = std::make_unique>(seed, type); + + } else if (type->isRow()) { + generator = std::make_unique>( + seed, type, std::vector>{}); + } + return generator; +} + +} // namespace facebook::velox::fuzzer diff --git a/velox/experimental/fuzzer_input_generator/ConstrainedGenerators.h b/velox/experimental/fuzzer_input_generator/ConstrainedGenerators.h new file mode 100644 index 0000000000000..d4a018e9c1fe5 --- /dev/null +++ b/velox/experimental/fuzzer_input_generator/ConstrainedGenerators.h @@ -0,0 +1,330 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include "folly/json.h" + +#include "velox/type/Type.h" +#include "velox/type/Variant.h" +#include "velox/vector/fuzzer/Utils.h" + +namespace facebook::velox::fuzzer { + +using facebook::velox::variant; + +class AbstractInputGenerator { + public: + AbstractInputGenerator( + size_t seed, + const TypePtr& type, + std::unique_ptr&& next); + + virtual ~AbstractInputGenerator() = default; + + virtual variant generate() = 0; + + TypePtr type() const { + return type_; + } + + protected: + FuzzerGenerator rng_; + + TypePtr type_; + + std::unique_ptr next_; +}; + +/// Generates a random string (string size and encoding are passed through +/// Options). +std::string randString( + FuzzerGenerator& rng, + size_t length, + UTF8CharList encoding, + std::wstring_convert, char16_t>& converter); + +std::unique_ptr getRandomInputGenerator( + size_t seed, + const TypePtr& type); + +template +class RandomInputGenerator : public AbstractInputGenerator { + public: + RandomInputGenerator(size_t seed, const TypePtr& type) + : AbstractInputGenerator(seed, type, nullptr) {} + + ~RandomInputGenerator() override = default; + + variant generate() override { + if (type_->isDate()) { + return variant(randDate(rng_)); + } + return variant(rand(rng_)); + } +}; + +template +class RandomInputGenerator>> + : public AbstractInputGenerator { + public: + RandomInputGenerator>>( + size_t seed, + const TypePtr& type, + size_t maxLength = 20) + : AbstractInputGenerator(seed, type, nullptr), maxLength_{maxLength} {} + + ~RandomInputGenerator>>() + override = default; + + variant generate() override { + const auto length = rand(rng_, 0, maxLength_); + std::wstring_convert, char16_t> converter; + return variant(randString(rng_, length, encoding_, converter)); + } + + variant generate(size_t length, UTF8CharList encoding) { + std::wstring_convert, char16_t> converter; + return variant(randString(rng_, length, encoding, converter)); + } + + private: + const size_t maxLength_; + + UTF8CharList encoding_ = UTF8CharList::ASCII; +}; + +template +class RandomInputGenerator>> + : public AbstractInputGenerator { + public: + RandomInputGenerator>>( + size_t seed, + const TypePtr& type, + size_t maxLength = 10, + std::unique_ptr&& elementGenerator = nullptr, + std::optional containAtIndex = std::nullopt, + std::unique_ptr&& containGenerator = nullptr) + : AbstractInputGenerator(seed, type, nullptr), + maxLength_{maxLength}, + elementGenerator_{ + elementGenerator ? std::move(elementGenerator) + : getRandomInputGenerator(seed, type->childAt(0))}, + containAtIndex_{containAtIndex}, + containGenerator_{std::move(containGenerator)} {} + + ~RandomInputGenerator>>() + override = default; + + variant generate() override { + const auto length = rand(rng_, 0, maxLength_); + std::vector elements; + elements.reserve(length); + for (size_t i = 0; i < length; ++i) { + if UNLIKELY (containAtIndex_.has_value() && *containAtIndex_ == i) { + elements.push_back(containGenerator_->generate()); + } else { + elements.push_back(elementGenerator_->generate()); + } + } + return variant::array(elements); + } + + private: + const size_t maxLength_; + + std::unique_ptr elementGenerator_; + + std::optional containAtIndex_; + + std::unique_ptr containGenerator_; +}; + +template +class RandomInputGenerator>> + : public AbstractInputGenerator { + public: + RandomInputGenerator>>( + size_t seed, + const TypePtr& type, + size_t maxLength = 10, + std::unique_ptr&& keyGenerator = nullptr, + std::unique_ptr&& valueGenerator = nullptr, + std::optional containAtIndex = std::nullopt, + std::unique_ptr&& containKeyGenerator = nullptr, + std::unique_ptr&& containValueGenerator = nullptr) + : AbstractInputGenerator(seed, type, nullptr), + maxLength_{maxLength}, + keyGenerator_{ + keyGenerator ? std::move(keyGenerator) + : getRandomInputGenerator(seed, type->childAt(0))}, + valueGenerator_{ + valueGenerator ? std::move(valueGenerator) + : getRandomInputGenerator(seed, type->childAt(1))}, + containAtIndex_{containAtIndex}, + containKeyGenerator_{std::move(containKeyGenerator)}, + containValueGenerator_{std::move(containValueGenerator)} {} + + ~RandomInputGenerator>>() + override = default; + + variant generate() override { + const auto length = rand(rng_, 0, maxLength_); + std::map map; + for (size_t i = 0; i < length; ++i) { + if UNLIKELY (containAtIndex_.has_value() && *containAtIndex_ == i) { + map.emplace( + containKeyGenerator_->generate(), + containValueGenerator_->generate()); + } else { + map.emplace(keyGenerator_->generate(), valueGenerator_->generate()); + } + } + return variant::map(map); + } + + private: + const size_t maxLength_; + + std::unique_ptr keyGenerator_; + + std::unique_ptr valueGenerator_; + + std::optional containAtIndex_; + + std::unique_ptr containKeyGenerator_; + + std::unique_ptr containValueGenerator_; +}; + +template +class RandomInputGenerator>> + : public AbstractInputGenerator { + public: + RandomInputGenerator>>( + size_t seed, + const TypePtr& type, + std::vector> fieldGenerators) + : AbstractInputGenerator(seed, type, nullptr) { + const auto length = type->size(); + fieldGenerators_ = std::move(fieldGenerators); + for (size_t i = 0; i < length; ++i) { + if (fieldGenerators_.size() <= i) { + fieldGenerators_.push_back( + getRandomInputGenerator(seed, type->childAt(i))); + } else if (fieldGenerators_[i] == nullptr) { + fieldGenerators_[i] = getRandomInputGenerator(seed, type->childAt(i)); + } + } + } + + ~RandomInputGenerator>>() + override = default; + + variant generate() override { + const auto length = type_->size(); + std::vector fields; + fields.reserve(length); + for (size_t i = 0; i < length; ++i) { + fields.push_back(fieldGenerators_[i]->generate()); + } + return variant::row(fields); + } + + private: + std::vector> fieldGenerators_; +}; + +class NotEqualConstrainedGenerator : public AbstractInputGenerator { + public: + NotEqualConstrainedGenerator( + size_t seed, + const TypePtr& type, + const variant& excludedValue, + std::unique_ptr&& next) + : AbstractInputGenerator(seed, type, std::move(next)), + excludedValue_{excludedValue} {} + + ~NotEqualConstrainedGenerator() override = default; + + variant generate() override; + + private: + variant excludedValue_; +}; + +class SetConstrainedGenerator : public AbstractInputGenerator { + public: + SetConstrainedGenerator( + size_t seed, + const TypePtr& type, + const std::vector& set) + : AbstractInputGenerator(seed, type, nullptr), set_{set} {} + + ~SetConstrainedGenerator() override = default; + + variant generate() override; + + private: + std::vector set_; +}; + +class JsonInputGenerator : public AbstractInputGenerator { + public: + JsonInputGenerator( + size_t seed, + const TypePtr& type, + std::unique_ptr&& objectGenerator, + bool makeRandomVariation = false) + : AbstractInputGenerator(seed, type, nullptr), + objectGenerator_{std::move(objectGenerator)}, + makeRandomVariation_{makeRandomVariation}, + opts_{getSerializationOptions()} {} + + ~JsonInputGenerator() override = default; + + variant generate() override; + + const folly::json::serialization_opts& serializationOptions() const { + return opts_; + } + + private: + template + folly::dynamic convertVariantToDynamicPrimitive(const variant& v) { + using T = typename TypeTraits::DeepCopiedType; + VELOX_CHECK(v.isSet()); + const T value = v.value(); + return folly::dynamic(value); + } + + folly::dynamic convertVariantToDynamic(const variant& object); + + void makeRandomVariation(std::string json); + + folly::json::serialization_opts getSerializationOptions(); + + std::unique_ptr objectGenerator_; + + bool makeRandomVariation_; + + folly::json::serialization_opts opts_; +}; + +} // namespace facebook::velox::fuzzer diff --git a/velox/experimental/fuzzer_input_generator/ConstrainedVectorGenerator.cpp b/velox/experimental/fuzzer_input_generator/ConstrainedVectorGenerator.cpp new file mode 100644 index 0000000000000..20797769ea766 --- /dev/null +++ b/velox/experimental/fuzzer_input_generator/ConstrainedVectorGenerator.cpp @@ -0,0 +1,149 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/experimental/fuzzer_input_generator/ConstrainedVectorGenerator.h" + +#include "velox/expression/VectorWriters.h" + +namespace facebook::velox::fuzzer { + +using exec::GenericWriter; +using exec::VectorWriter; + +// static +VectorPtr ConstrainedVectorGenerator::generateConstant( + const std::shared_ptr& customGenerator, + vector_size_t size, + memory::MemoryPool* pool) { + VELOX_CHECK_NOT_NULL(customGenerator); + VELOX_CHECK(customGenerator->type()->isPrimitiveType()); + + const auto& type = customGenerator->type(); + const auto variant = customGenerator->generate(); + + return BaseVector::createConstant(type, variant, size, pool); +} + +template +void writeOne(const variant& value, GenericWriter& writer); +template <> +void writeOne(const variant& value, GenericWriter& writer); +template <> +void writeOne(const variant& value, GenericWriter& writer); +template <> +void writeOne(const variant& value, GenericWriter& writer); +template <> +void writeOne(const variant& value, GenericWriter& writer); +template <> +void writeOne(const variant& value, GenericWriter& writer); + +template +void writeOne(const variant& value, GenericWriter& writer) { + using T = typename TypeTraits::NativeType; + writer.template castTo() = value.value(); +} + +template <> +void writeOne(const variant& value, GenericWriter& writer) { + writer.template castTo() = value.value(); +} + +template <> +void writeOne( + const variant& value, + GenericWriter& writer) { + writer.template castTo() = value.value(); +} + +template <> +void writeOne(const variant& value, GenericWriter& writer) { + auto& writerTyped = writer.template castTo>(); + const auto& elements = value.array(); + for (const auto& element : elements) { + if (element.isNull()) { + writerTyped.add_null(); + } else { + VELOX_DYNAMIC_TYPE_DISPATCH( + writeOne, element.kind(), element, writerTyped.add_item()); + } + } +} + +template <> +void writeOne(const variant& value, GenericWriter& writer) { + auto& writerTyped = writer.template castTo>(); + const auto& map = value.map(); + for (const auto& pair : map) { + const auto& key = pair.first; + const auto& value = pair.second; + VELOX_CHECK(!key.isNull()); + if (value.isNull()) { + VELOX_DYNAMIC_TYPE_DISPATCH( + writeOne, key.kind(), key, writerTyped.add_null()); + } else { + auto writers = writerTyped.add_item(); + VELOX_DYNAMIC_TYPE_DISPATCH( + writeOne, key.kind(), key, std::get<0>(writers)); + VELOX_DYNAMIC_TYPE_DISPATCH( + writeOne, value.kind(), value, std::get<1>(writers)); + } + } +} + +template <> +void writeOne(const variant& value, GenericWriter& writer) { + auto& writerTyped = writer.template castTo(); + const auto& elements = value.row(); + column_index_t i = 0; + for (const auto& element : elements) { + if (element.isNull()) { + writerTyped.set_null_at(i); + } else { + VELOX_DYNAMIC_TYPE_DISPATCH( + writeOne, element.kind(), element, writerTyped.get_writer_at(i)); + } + i++; + } +} + +// static +VectorPtr ConstrainedVectorGenerator::generateFlat( + const std::shared_ptr& customGenerator, + vector_size_t size, + memory::MemoryPool* pool) { + VELOX_CHECK_NOT_NULL(customGenerator); + + VectorPtr result; + const auto& type = customGenerator->type(); + BaseVector::ensureWritable(SelectivityVector(size), type, pool, result); + VectorWriter writer; + writer.init(*result); + + for (auto i = 0; i < size; ++i) { + writer.setOffset(i); + const auto variant = customGenerator->generate(); + if (variant.isNull()) { + writer.commitNull(); + } else { + VELOX_DYNAMIC_TYPE_DISPATCH( + writeOne, type->kind(), variant, writer.current()); + writer.commit(true); + } + } + return result; +} + +} // namespace facebook::velox::fuzzer diff --git a/velox/experimental/fuzzer_input_generator/ConstrainedVectorGenerator.h b/velox/experimental/fuzzer_input_generator/ConstrainedVectorGenerator.h new file mode 100644 index 0000000000000..555b905f46b0a --- /dev/null +++ b/velox/experimental/fuzzer_input_generator/ConstrainedVectorGenerator.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "velox/experimental/fuzzer_input_generator/ConstrainedGenerators.h" + +namespace facebook::velox::fuzzer { + +class ConstrainedVectorGenerator { + public: + ConstrainedVectorGenerator() = delete; + + static VectorPtr generateConstant( + const std::shared_ptr& customGenerator, + vector_size_t size, + memory::MemoryPool* pool); + + static VectorPtr generateFlat( + const std::shared_ptr& customGenerator, + vector_size_t size, + memory::MemoryPool* pool); +}; + +} // namespace facebook::velox::fuzzer diff --git a/velox/experimental/fuzzer_input_generator/tests/CMakeLists.txt b/velox/experimental/fuzzer_input_generator/tests/CMakeLists.txt new file mode 100644 index 0000000000000..3f781a64aaf3b --- /dev/null +++ b/velox/experimental/fuzzer_input_generator/tests/CMakeLists.txt @@ -0,0 +1,28 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_executable(velox_fuzzer_constrained_input_generators_test + ConstrainedGeneratorsTest.cpp) + +add_test( + NAME velox_fuzzer_constrained_input_generators_test + COMMAND velox_fuzzer_constrained_input_generators_test + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + +target_link_libraries( + velox_fuzzer_constrained_input_generators_test + velox_fuzzer_constrained_input_generators + velox_presto_types + velox_type + velox_vector_test_lib) diff --git a/velox/experimental/fuzzer_input_generator/tests/ConstrainedGeneratorsTest.cpp b/velox/experimental/fuzzer_input_generator/tests/ConstrainedGeneratorsTest.cpp new file mode 100644 index 0000000000000..1016da37179d9 --- /dev/null +++ b/velox/experimental/fuzzer_input_generator/tests/ConstrainedGeneratorsTest.cpp @@ -0,0 +1,260 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/experimental/fuzzer_input_generator/ConstrainedGenerators.h" + +#include + +#include "velox/experimental/fuzzer_input_generator/ConstrainedVectorGenerator.h" +#include "velox/functions/prestosql/types/JsonType.h" +#include "velox/type/Variant.h" +#include "velox/vector/tests/utils/VectorTestBase.h" + +namespace facebook::velox::fuzzer::test { + +class ConstrainedGeneratorsTest : public testing::Test, + public velox::test::VectorTestBase { + protected: + static void SetUpTestCase() { + memory::MemoryManager::testingSetInstance({}); + } + + template + void testRandomPrimitive(const TypePtr& type) { + VELOX_CHECK_EQ(type->kind(), KIND); + using T = typename TypeTraits::NativeType; + + std::unique_ptr generator = + std::make_unique>(0, type); + auto value = generator->generate(); + EXPECT_TRUE(value.hasValue()); + EXPECT_EQ(value.kind(), KIND); + } + + template + void testRandomComplex(const TypePtr& type) { + VELOX_CHECK_EQ(type->kind(), KIND); + using T = typename TypeTraits::ImplType; + + std::unique_ptr generator = + std::make_unique>(0, type); + auto value = generator->generate(); + EXPECT_TRUE(value.hasValue()); + EXPECT_EQ(value.kind(), KIND); // TODO: check type recursive + } + + template + void testNotEqualPrimitive(const TypePtr& type, const TValue& excludedValue) { + VELOX_CHECK_EQ(type->kind(), KIND); + using T = typename TypeTraits::NativeType; + + variant excludedVariant{excludedValue}; + std::unique_ptr generator = + std::make_unique( + 0, + type, + excludedVariant, + std::make_unique>(0, type)); + + const uint32_t kIterations = 1000; + for (uint32_t i = 0; i < kIterations; ++i) { + auto value = generator->generate(); + EXPECT_TRUE(value.hasValue()); + EXPECT_EQ(value.kind(), KIND); + EXPECT_NE(value, excludedVariant); + } + } + + template + void testNotEqualComplex( + const TypePtr& type, + const variant& excludedVariant) { + VELOX_CHECK_EQ(type->kind(), KIND); + using T = typename TypeTraits::ImplType; + + std::unique_ptr generator = + std::make_unique( + 0, + type, + excludedVariant, + std::make_unique>(0, type)); + + const uint32_t kIterations = 1000; + for (uint32_t i = 0; i < kIterations; ++i) { + auto value = generator->generate(); + EXPECT_TRUE(value.hasValue()); + EXPECT_EQ(value.kind(), KIND); // todo: check type recursive + EXPECT_NE(value, excludedVariant); + } + } + + template + void testSetPrimitive(const TypePtr& type, const TSet& setOfRawValues) { + VELOX_CHECK_EQ(type->kind(), KIND); + using T = typename TypeTraits::NativeType; + + const uint32_t kIterations = 1000; + std::vector variants; + for (const auto& value : setOfRawValues) { + variants.push_back(variant{value}); + } + std::unique_ptr generator = + std::make_unique(0, type, variants); + + for (uint32_t i = 0; i < kIterations; ++i) { + auto value = generator->generate(); + EXPECT_TRUE(value.hasValue()); + EXPECT_EQ(value.kind(), KIND); + EXPECT_NE(setOfRawValues.count(value.value()), 0); + } + } + + template + void testSetComplex( + const TypePtr& type, + const std::vector& variants) { + VELOX_CHECK_EQ(type->kind(), KIND); + using T = typename TypeTraits::ImplType; + + std::set setOfVariants{variants.begin(), variants.end()}; + + std::unique_ptr generator = + std::make_unique(0, type, variants); + + const uint32_t kIterations = 1000; + for (uint32_t i = 0; i < kIterations; ++i) { + auto value = generator->generate(); + EXPECT_TRUE(value.hasValue()); + EXPECT_EQ(value.kind(), KIND); // todo: check type recursive + EXPECT_NE(setOfVariants.count(value), 0); + } + } + + template + void testGenerateVectorsPrimitive( + const TypePtr& type, + const variant& excludedValue) { + using T = typename TypeTraits::NativeType; + const uint32_t kSize = 1000; + std::shared_ptr generator = + std::make_shared( + 0, + type, + excludedValue, + std::make_unique>(0, type)); + auto vector = + ConstrainedVectorGenerator::generateConstant(generator, kSize, pool()); + EXPECT_EQ(vector->size(), kSize); + EXPECT_EQ(vector->typeKind(), KIND); + EXPECT_TRUE(vector->isConstantEncoding()); + EXPECT_NE(vector->as>()->valueAt(0), excludedValue); + + vector = ConstrainedVectorGenerator::generateFlat(generator, kSize, pool()); + EXPECT_EQ(vector->size(), kSize); + EXPECT_EQ(vector->typeKind(), KIND); + EXPECT_TRUE(vector->isFlatEncoding()); + for (auto i = 0; i < kSize; ++i) { + EXPECT_NE(vector->as>()->valueAt(i), excludedValue); + } + } + + template + void testGenerateVectorsComplex(const TypePtr& type) { + using T = typename TypeTraits::ImplType; + const uint32_t kSize = 1000; + std::shared_ptr generator = + std::make_shared>(0, type); + auto vector = + ConstrainedVectorGenerator::generateFlat(generator, kSize, pool()); + EXPECT_EQ(vector->size(), kSize); + EXPECT_EQ(vector->type(), type); + } +}; + +TEST_F(ConstrainedGeneratorsTest, randomPrimitive) { + testRandomPrimitive(INTEGER()); + + testRandomPrimitive(VARCHAR()); +} + +TEST_F(ConstrainedGeneratorsTest, randomComplex) { + testRandomComplex(ARRAY(MAP(VARCHAR(), ROW({BIGINT()})))); +} + +TEST_F(ConstrainedGeneratorsTest, notEqPrimitive) { + testNotEqualPrimitive(TINYINT(), static_cast(1)); + + testNotEqualPrimitive(VARCHAR(), ""_sv); +} + +TEST_F(ConstrainedGeneratorsTest, notEqComplex) { + auto excludedVariant = variant::array({variant::map( + {{variant{"1"}, variant::row({variant{static_cast(1)}})}})}); + testNotEqualComplex( + ARRAY(MAP(VARCHAR(), ROW({BIGINT()}))), excludedVariant); +} + +TEST_F(ConstrainedGeneratorsTest, setPrimitive) { + std::unordered_set integers{{1, 2, 3}}; + testSetPrimitive(INTEGER(), integers); + + std::unordered_set strings{{"1", "2", "3"}}; + testSetPrimitive(VARCHAR(), strings); +} + +TEST_F(ConstrainedGeneratorsTest, setComplex) { + std::vector variants{ + variant::array({variant::map( + {{variant{"1"}, variant::row({variant{static_cast(1)}})}})}), + variant::array({variant::map( + {{variant{"2"}, + variant::row({variant{static_cast(2)}})}})})}; + testSetComplex( + ARRAY(MAP(VARCHAR(), ROW({BIGINT()}))), variants); +} + +TEST_F(ConstrainedGeneratorsTest, json) { + const TypePtr type = ARRAY(MAP(DOUBLE(), ROW({BIGINT()}))); + std::unique_ptr generator = + std::make_unique( + 0, + JSON(), + std::make_unique>(0, type)); + + const uint32_t kIterations = 1000; + const auto& opts = generator->serializationOptions(); + for (uint32_t i = 0; i < kIterations; ++i) { + auto value = generator->generate(); + EXPECT_TRUE(value.hasValue()); + EXPECT_EQ(value.kind(), TypeKind::VARCHAR); + folly::dynamic json; + EXPECT_NO_THROW( + json = folly::parseJson(value.value(), opts)); + EXPECT_TRUE(json.isArray()); + } +} + +TEST_F(ConstrainedGeneratorsTest, generateVectors) { + testGenerateVectorsPrimitive(BIGINT(), variant(0)); + testGenerateVectorsPrimitive(VARCHAR(), variant("")); + + testGenerateVectorsComplex( + ARRAY(ROW({MAP(VARCHAR(), BIGINT())}))); + testGenerateVectorsComplex( + MAP(ARRAY(BIGINT()), ROW({VARCHAR()}))); +} + +} // namespace facebook::velox::fuzzer::test diff --git a/velox/vector/fuzzer/Utils.cpp b/velox/vector/fuzzer/Utils.cpp index 805fcf1063d73..f8bab45af6940 100644 --- a/velox/vector/fuzzer/Utils.cpp +++ b/velox/vector/fuzzer/Utils.cpp @@ -16,13 +16,53 @@ #include "velox/vector/fuzzer/Utils.h" -namespace facebook::velox::generator_spec_utils { +namespace facebook::velox { bool coinToss(FuzzerGenerator& rng, double threshold) { static std::uniform_real_distribution<> dist(0.0, 1.0); return dist(rng) < threshold; } +Timestamp randTimestamp( + FuzzerGenerator& rng, + FuzzerTimestampPrecision timestampPrecision) { + // Generate timestamps only in the valid range to avoid datetime functions, + // such as try_cast(varchar as timestamp), throwing VeloxRuntimeError in + // fuzzers. + constexpr int64_t min = -2'140'671'600; + constexpr int64_t max = 2'140'671'600; + constexpr int64_t microInSecond = 1'000'000; + constexpr int64_t millisInSecond = 1'000; + // DWRF requires nano to be in a certain range. Hardcode the value here to + // avoid the dependency on DWRF. + constexpr int64_t MAX_NANOS = 1'000'000'000; + + switch (timestampPrecision) { + case FuzzerTimestampPrecision::kNanoSeconds: + return Timestamp( + rand(rng, min, max), (rand(rng) % MAX_NANOS)); + case FuzzerTimestampPrecision::kMicroSeconds: + return Timestamp::fromMicros( + rand(rng, min, max) * microInSecond + + rand(rng, -microInSecond, microInSecond)); + case FuzzerTimestampPrecision::kMilliSeconds: + return Timestamp::fromMillis( + rand(rng, min, max) * millisInSecond + + rand(rng, -millisInSecond, millisInSecond)); + case FuzzerTimestampPrecision::kSeconds: + return Timestamp(rand(rng, min, max), 0); + } + return {}; // no-op. +} + +int32_t randDate(FuzzerGenerator& rng) { + constexpr int64_t min = -24'450; + constexpr int64_t max = 24'450; + return rand(rng, min, max); +} + +namespace generator_spec_utils { + vector_size_t getRandomIndex(FuzzerGenerator& rng, vector_size_t maxIndex) { std::uniform_int_distribution indexGenerator( 0, maxIndex); // generates index in [0, maxIndex] @@ -59,4 +99,6 @@ BufferPtr generateIndicesBuffer( return indices; } -} // namespace facebook::velox::generator_spec_utils +} // namespace generator_spec_utils + +} // namespace facebook::velox diff --git a/velox/vector/fuzzer/Utils.h b/velox/vector/fuzzer/Utils.h index 0248b08f942fa..fbbed12a0b102 100644 --- a/velox/vector/fuzzer/Utils.h +++ b/velox/vector/fuzzer/Utils.h @@ -16,6 +16,11 @@ #pragma once +#include + +#include +#include + #include "velox/vector/BaseVector.h" #include "velox/vector/NullsBuilder.h" @@ -23,6 +28,57 @@ namespace facebook::velox { using FuzzerGenerator = std::mt19937; +enum UTF8CharList { + ASCII = 0, // Ascii character set. + UNICODE_CASE_SENSITIVE = 1, // Unicode scripts that support case. + EXTENDED_UNICODE = 2, // Extended Unicode: Arabic, Devanagiri etc + MATHEMATICAL_SYMBOLS = 3 // Mathematical Symbols. +}; + +/// Unicode character ranges. Ensure the vector indexes match the UTF8CharList +/// enum values. +/// +/// Source: https://jrgraphix.net/research/unicode_blocks.php +static const std::vector>> + kUTFChatSets{ + // UTF8CharList::ASCII + { + {33, 127}, // All ASCII printable chars. + }, + // UTF8CharList::UNICODE_CASE_SENSITIVE + { + {u'\u0020', u'\u007F'}, // Basic Latin. + {u'\u0400', u'\u04FF'}, // Cyrillic. + }, + // UTF8CharList::EXTENDED_UNICODE + { + {u'\u03F0', u'\u03FF'}, // Greek. + {u'\u0100', u'\u017F'}, // Latin Extended A. + {u'\u0600', u'\u06FF'}, // Arabic. + {u'\u0900', u'\u097F'}, // Devanagari. + {u'\u0600', u'\u06FF'}, // Hebrew. + {u'\u3040', u'\u309F'}, // Hiragana. + {u'\u2000', u'\u206F'}, // Punctuation. + {u'\u2070', u'\u209F'}, // Sub/Super Script. + {u'\u20A0', u'\u20CF'}, // Currency. + }, + // UTF8CharList::MATHEMATICAL_SYMBOLS + { + {u'\u2200', u'\u22FF'}, // Math Operators. + {u'\u2150', u'\u218F'}, // Number Forms. + {u'\u25A0', u'\u25FF'}, // Geometric Shapes. + {u'\u27C0', u'\u27EF'}, // Math Symbols. + {u'\u2A00', u'\u2AFF'}, // Supplemental. + }, + }; + +bool coinToss(FuzzerGenerator& rng, double threshold); + +struct DataSpec { + bool includeNaN; + bool includeInfinity; +}; + enum class FuzzerTimestampPrecision : int8_t { kNanoSeconds = 0, kMicroSeconds = 1, @@ -30,9 +86,96 @@ enum class FuzzerTimestampPrecision : int8_t { kSeconds = 3, }; -namespace generator_spec_utils { +// Generate random values for the different supported types. +template +inline T rand(FuzzerGenerator& rng, DataSpec dataSpec = {false, false}) { + VELOX_NYI(); +} -bool coinToss(FuzzerGenerator& rng, double threshold); +template <> +inline int8_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return boost::random::uniform_int_distribution()(rng); +} + +template <> +inline int16_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return boost::random::uniform_int_distribution()(rng); +} + +template <> +inline int32_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return boost::random::uniform_int_distribution()(rng); +} + +template <> +inline int64_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return boost::random::uniform_int_distribution()(rng); +} + +template <> +inline double rand(FuzzerGenerator& rng, DataSpec dataSpec) { + if (dataSpec.includeNaN && coinToss(rng, 0.05)) { + return std::nan(""); + } + + if (dataSpec.includeInfinity && coinToss(rng, 0.05)) { + return std::numeric_limits::infinity(); + } + + return boost::random::uniform_01()(rng); +} + +template <> +inline float rand(FuzzerGenerator& rng, DataSpec dataSpec) { + if (dataSpec.includeNaN && coinToss(rng, 0.05)) { + return std::nanf(""); + } + + if (dataSpec.includeInfinity && coinToss(rng, 0.05)) { + return std::numeric_limits::infinity(); + } + + return boost::random::uniform_01()(rng); +} + +template <> +inline bool rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return boost::random::uniform_int_distribution(0, 1)(rng); +} + +template <> +inline uint32_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return boost::random::uniform_int_distribution()(rng); +} + +template <> +inline uint64_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return boost::random::uniform_int_distribution()(rng); +} + +template <> +inline int128_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + return HugeInt::build(rand(rng), rand(rng)); +} + +Timestamp randTimestamp( + FuzzerGenerator& rng, + FuzzerTimestampPrecision timestampPrecision); + +template <> +inline Timestamp rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { + // TODO: support other timestamp precisions. + return randTimestamp(rng, FuzzerTimestampPrecision::kMicroSeconds); +} + +int32_t randDate(FuzzerGenerator& rng); + +template , int> = 0> +inline T rand(FuzzerGenerator& rng, T min, T max) { + return boost::random::uniform_int_distribution(min, max)(rng); +} + +namespace generator_spec_utils { vector_size_t getRandomIndex(FuzzerGenerator& rng, vector_size_t maxIndex); diff --git a/velox/vector/fuzzer/VectorFuzzer.cpp b/velox/vector/fuzzer/VectorFuzzer.cpp index 9797bc56632a7..cc8f793d28e04 100644 --- a/velox/vector/fuzzer/VectorFuzzer.cpp +++ b/velox/vector/fuzzer/VectorFuzzer.cpp @@ -27,15 +27,12 @@ #include "velox/vector/FlatVector.h" #include "velox/vector/NullsBuilder.h" #include "velox/vector/VectorTypeUtils.h" +#include "velox/vector/fuzzer/Utils.h" namespace facebook::velox { namespace { -// DWRF requires nano to be in a certain range. Hardcode the value here to avoid -// the dependency on DWRF. -constexpr int64_t MAX_NANOS = 1'000'000'000; - // Structure to help temporary changes to Options. This objects saves the // current state of the Options object, and restores it when it's destructed. // For instance, if you would like to temporarily disable nulls for a particular @@ -62,116 +59,6 @@ struct ScopedOptions { VectorFuzzer::Options savedOpts; }; -// Generate random values for the different supported types. -template -T rand(FuzzerGenerator& rng, DataSpec dataSpec = {false, false}) { - VELOX_NYI(); -} - -template <> -int8_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return boost::random::uniform_int_distribution()(rng); -} - -template <> -int16_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return boost::random::uniform_int_distribution()(rng); -} - -template <> -int32_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return boost::random::uniform_int_distribution()(rng); -} - -template <> -int64_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return boost::random::uniform_int_distribution()(rng); -} - -template <> -double rand(FuzzerGenerator& rng, DataSpec dataSpec) { - if (dataSpec.includeNaN && coinToss(rng, 0.05)) { - return std::nan(""); - } - - if (dataSpec.includeInfinity && coinToss(rng, 0.05)) { - return std::numeric_limits::infinity(); - } - - return boost::random::uniform_01()(rng); -} - -template <> -float rand(FuzzerGenerator& rng, DataSpec dataSpec) { - if (dataSpec.includeNaN && coinToss(rng, 0.05)) { - return std::nanf(""); - } - - if (dataSpec.includeInfinity && coinToss(rng, 0.05)) { - return std::numeric_limits::infinity(); - } - - return boost::random::uniform_01()(rng); -} - -template <> -bool rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return boost::random::uniform_int_distribution(0, 1)(rng); -} - -template <> -uint32_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return boost::random::uniform_int_distribution()(rng); -} - -template <> -uint64_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return boost::random::uniform_int_distribution()(rng); -} - -template <> -int128_t rand(FuzzerGenerator& rng, DataSpec /*dataSpec*/) { - return HugeInt::build(rand(rng), rand(rng)); -} - -template , int> = 0> -T rand(FuzzerGenerator& rng, T min, T max) { - return boost::random::uniform_int_distribution(min, max)(rng); -} - -Timestamp randTimestamp(FuzzerGenerator& rng, VectorFuzzer::Options opts) { - // Generate timestamps only in the valid range to avoid datetime functions, - // such as try_cast(varchar as timestamp), throwing VeloxRuntimeError in - // fuzzers. - constexpr int64_t min = -2'140'671'600; - constexpr int64_t max = 2'140'671'600; - constexpr int64_t microInSecond = 1'000'000; - constexpr int64_t millisInSecond = 1'000; - - switch (opts.timestampPrecision) { - case FuzzerTimestampPrecision::kNanoSeconds: - return Timestamp( - rand(rng, min, max), (rand(rng) % MAX_NANOS)); - case FuzzerTimestampPrecision::kMicroSeconds: - return Timestamp::fromMicros( - rand(rng, min, max) * microInSecond + - rand(rng, -microInSecond, microInSecond)); - case FuzzerTimestampPrecision::kMilliSeconds: - return Timestamp::fromMillis( - rand(rng, min, max) * millisInSecond + - rand(rng, -millisInSecond, millisInSecond)); - case FuzzerTimestampPrecision::kSeconds: - return Timestamp(rand(rng, min, max), 0); - } - return {}; // no-op. -} - -int32_t randDate(FuzzerGenerator& rng) { - constexpr int64_t min = -24'450; - constexpr int64_t max = 24'450; - return rand(rng, min, max); -} - size_t getElementsVectorLength( const VectorFuzzer::Options& opts, vector_size_t size) { @@ -196,42 +83,6 @@ int128_t randLongDecimal(const TypePtr& type, FuzzerGenerator& rng) { return rand(rng) % DecimalUtil::kPowersOfTen[precision]; } -/// Unicode character ranges. Ensure the vector indexes match the UTF8CharList -/// enum values. -/// -/// Source: https://jrgraphix.net/research/unicode_blocks.php -const std::vector>> kUTFChatSets{ - // UTF8CharList::ASCII - { - {33, 127}, // All ASCII printable chars. - }, - // UTF8CharList::UNICODE_CASE_SENSITIVE - { - {u'\u0020', u'\u007F'}, // Basic Latin. - {u'\u0400', u'\u04FF'}, // Cyrillic. - }, - // UTF8CharList::EXTENDED_UNICODE - { - {u'\u03F0', u'\u03FF'}, // Greek. - {u'\u0100', u'\u017F'}, // Latin Extended A. - {u'\u0600', u'\u06FF'}, // Arabic. - {u'\u0900', u'\u097F'}, // Devanagari. - {u'\u0600', u'\u06FF'}, // Hebrew. - {u'\u3040', u'\u309F'}, // Hiragana. - {u'\u2000', u'\u206F'}, // Punctuation. - {u'\u2070', u'\u209F'}, // Sub/Super Script. - {u'\u20A0', u'\u20CF'}, // Currency. - }, - // UTF8CharList::MATHEMATICAL_SYMBOLS - { - {u'\u2200', u'\u22FF'}, // Math Operators. - {u'\u2150', u'\u218F'}, // Number Forms. - {u'\u25A0', u'\u25FF'}, // Geometric Shapes. - {u'\u27C0', u'\u27EF'}, // Math Symbols. - {u'\u2A00', u'\u2AFF'}, // Supplemental. - }, -}; - FOLLY_ALWAYS_INLINE char16_t getRandomChar( FuzzerGenerator& rng, const std::vector>& charSet) { @@ -290,7 +141,7 @@ VectorPtr fuzzConstantPrimitiveImpl( } if constexpr (std::is_same_v) { return std::make_shared>( - pool, size, false, type, randTimestamp(rng, opts)); + pool, size, false, type, randTimestamp(rng, opts.timestampPrecision)); } else if (type->isDate()) { return std::make_shared>( pool, size, false, type, randDate(rng)); @@ -322,7 +173,7 @@ void fuzzFlatPrimitiveImpl( if constexpr (std::is_same_v) { flatVector->set(i, randString(rng, opts, strBuf, converter)); } else if constexpr (std::is_same_v) { - flatVector->set(i, randTimestamp(rng, opts)); + flatVector->set(i, randTimestamp(rng, opts.timestampPrecision)); } else if constexpr (std::is_same_v) { if (vector->type()->isShortDecimal()) { flatVector->set(i, randShortDecimal(vector->type(), rng)); diff --git a/velox/vector/fuzzer/VectorFuzzer.h b/velox/vector/fuzzer/VectorFuzzer.h index 9c5691aa144eb..75254d153c4ea 100644 --- a/velox/vector/fuzzer/VectorFuzzer.h +++ b/velox/vector/fuzzer/VectorFuzzer.h @@ -27,18 +27,6 @@ namespace facebook::velox { -enum UTF8CharList { - ASCII = 0, // Ascii character set. - UNICODE_CASE_SENSITIVE = 1, // Unicode scripts that support case. - EXTENDED_UNICODE = 2, // Extended Unicode: Arabic, Devanagiri etc - MATHEMATICAL_SYMBOLS = 3 // Mathematical Symbols. -}; - -struct DataSpec { - bool includeNaN; - bool includeInfinity; -}; - const std::vector& defaultScalarTypes(); /// VectorFuzzer is a helper class that generates randomized vectors and their