From 030e439f5616080d9c85cc954b7dbe09898d42c8 Mon Sep 17 00:00:00 2001 From: rui-mo Date: Sat, 7 Sep 2024 01:16:07 -0700 Subject: [PATCH] Extend expression fuzzer test to support decimal (#9149) Summary: ArgumentTypeFuzzer could be used to generate argument types when constructing a decimal expression in the fuzzer test. However, given a result type, it is unable to produce argument types that satisfy the necessary constraints. To address this limitation, argument type generators for Presto and Spark decimal functions have been added. In this PR, ExpressionFuzzer takes a map from function name to an instance of the argument generator. Custom generators provided by Presto and Spark are used in the expression fuzzer test to generate argument types. Experimental fuzzer tests with decimal type enabled are added. https://github.com/facebookincubator/velox/issues/1968 Pull Request resolved: https://github.com/facebookincubator/velox/pull/9149 Reviewed By: kgpai Differential Revision: D62193491 Pulled By: gggrace14 fbshipit-source-id: 6a0d935c4df4bfe4af605010b7df23a062fefc5a --- CONTRIBUTING.md | 4 +- velox/docs/develop/testing/fuzzer.rst | 2 + velox/expression/ReverseSignatureBinder.cpp | 20 -- velox/expression/ReverseSignatureBinder.h | 4 - .../expression/fuzzer/ArgumentTypeFuzzer.cpp | 174 ++++++++++++------ velox/expression/fuzzer/ArgumentTypeFuzzer.h | 16 ++ velox/expression/fuzzer/ExpressionFuzzer.cpp | 64 +++++-- velox/expression/fuzzer/ExpressionFuzzer.h | 12 +- .../fuzzer/ExpressionFuzzerTest.cpp | 22 ++- .../fuzzer/ExpressionFuzzerVerifier.cpp | 7 +- .../fuzzer/ExpressionFuzzerVerifier.h | 4 +- velox/expression/fuzzer/FuzzerRunner.cpp | 19 +- velox/expression/fuzzer/FuzzerRunner.h | 8 +- .../fuzzer/SparkExpressionFuzzerTest.cpp | 23 ++- .../fuzzer/tests/ArgumentTypeFuzzerTest.cpp | 59 ++++++ velox/functions/sparksql/MakeTimestamp.cpp | 6 +- .../sparksql/UnscaledValueFunction.cpp | 3 +- 17 files changed, 329 insertions(+), 118 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9a84ab2aeed2..6c8d4f75ca51 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -206,11 +206,11 @@ with a benchmark. ``` # Test the new function in isolation. Use --only flag to restrict the set of functions # and run for 60 seconds or longer. - velox_expression_fuzzer_test --only --duration_sec 60 --logtostderr=1 --enable_variadic_signatures --velox_fuzzer_enable_complex_types --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse + velox_expression_fuzzer_test --only --duration_sec 60 --logtostderr=1 --enable_variadic_signatures --velox_fuzzer_enable_complex_types --velox_fuzzer_enable_decimal_type --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse # Test the new function in combination with other functions. Do not restrict the set # of functions and run for 10 minutes (600 seconds) or longer. - velox_expression_fuzzer_test --duration_sec 600 --logtostderr=1 --enable_variadic_signatures --velox_fuzzer_enable_complex_types --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse + velox_expression_fuzzer_test --duration_sec 600 --logtostderr=1 --enable_variadic_signatures --velox_fuzzer_enable_complex_types --velox_fuzzer_enable_decimal_type --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse ``` Here are example PRs: diff --git a/velox/docs/develop/testing/fuzzer.rst b/velox/docs/develop/testing/fuzzer.rst index 639acda6ee5e..97dd226698a8 100644 --- a/velox/docs/develop/testing/fuzzer.rst +++ b/velox/docs/develop/testing/fuzzer.rst @@ -228,6 +228,8 @@ Below are arguments that toggle certain fuzzer features in Expression Fuzzer: * ``--velox_fuzzer_enable_complex_types``: Enable testing of function signatures with complex argument or return types. Default is false. +* ``--velox_fuzzer_enable_decimal_type``: Enable testing of function signatures with decimal argument or return type. Default is false. + * ``--lazy_vector_generation_ratio``: Specifies the probability with which columns in the input row vector will be selected to be wrapped in lazy encoding (expressed as double from 0 to 1). Default is 0.0. * ``--velox_fuzzer_enable_column_reuse``: Enable generation of expressions where one input column can be used by multiple subexpressions. Default is false. diff --git a/velox/expression/ReverseSignatureBinder.cpp b/velox/expression/ReverseSignatureBinder.cpp index 495920e6bf1f..274d2a06480f 100644 --- a/velox/expression/ReverseSignatureBinder.cpp +++ b/velox/expression/ReverseSignatureBinder.cpp @@ -18,27 +18,7 @@ namespace facebook::velox::exec { -bool ReverseSignatureBinder::hasConstrainedIntegerVariable( - const TypeSignature& type) const { - if (type.parameters().empty()) { - auto it = variables().find(type.baseName()); - return it != variables().end() && it->second.isIntegerParameter() && - it->second.constraint() != ""; - } - - const auto& parameters = type.parameters(); - for (const auto& parameter : parameters) { - if (hasConstrainedIntegerVariable(parameter)) { - return true; - } - } - return false; -} - bool ReverseSignatureBinder::tryBind() { - if (hasConstrainedIntegerVariable(signature_.returnType())) { - return false; - } tryBindSucceeded_ = SignatureBinderBase::tryBind(signature_.returnType(), returnType_); return tryBindSucceeded_; diff --git a/velox/expression/ReverseSignatureBinder.h b/velox/expression/ReverseSignatureBinder.h index 087adc4dbe77..3da800a5edbb 100644 --- a/velox/expression/ReverseSignatureBinder.h +++ b/velox/expression/ReverseSignatureBinder.h @@ -54,10 +54,6 @@ class ReverseSignatureBinder : private SignatureBinderBase { } private: - // Return whether there is a constraint on an integer variable in type - // signature. - bool hasConstrainedIntegerVariable(const TypeSignature& type) const; - const TypePtr returnType_; // True if 'tryBind' has been called and succeeded. False otherwise. diff --git a/velox/expression/fuzzer/ArgumentTypeFuzzer.cpp b/velox/expression/fuzzer/ArgumentTypeFuzzer.cpp index 1d7b784f2990..4e26e67a17a0 100644 --- a/velox/expression/fuzzer/ArgumentTypeFuzzer.cpp +++ b/velox/expression/fuzzer/ArgumentTypeFuzzer.cpp @@ -58,70 +58,43 @@ bool isPositiveInteger(const std::string& str) { void ArgumentTypeFuzzer::determineUnboundedIntegerVariables( const exec::TypeSignature& type) { - if (!isDecimalBaseName(type.baseName())) { + std::vector decimalTypes; + findDecimalTypes(type, decimalTypes); + if (decimalTypes.empty()) { return; } + for (const auto& decimalType : decimalTypes) { + auto [p, s] = tryBindFixedPrecisionScale(decimalType); - VELOX_CHECK_EQ(2, type.parameters().size()) - - const auto& precision = type.parameters()[0].baseName(); - const auto& scale = type.parameters()[1].baseName(); - - // Bind 'name' variable, if not already bound, using 'constant' constraint - // ('name'='123'). Return bound value if 'name' is already bound or was - // successfully bound to a constant value. Return std::nullopt otherwise. - auto tryFixedBinding = [&](const auto& name) -> std::optional { - auto it = variables().find(name); - if (it == variables().end()) { - VELOX_CHECK( - isPositiveInteger(name), - "Precision and scale of a decimal type must refer to a variable " - "or specify a position integer constant: {}", - name) - return std::stoi(name); - } - - if (integerBindings_.count(name) > 0) { - return integerBindings_[name]; + if (p.has_value() && s.has_value()) { + return; } - if (isPositiveInteger(it->second.constraint())) { - const auto value = std::stoi(it->second.constraint()); - integerBindings_[name] = value; - return value; - } - - return std::nullopt; - }; - - std::optional p = tryFixedBinding(precision); - std::optional s = tryFixedBinding(scale); + const auto& precision = decimalType.parameters()[0].baseName(); + const auto& scale = decimalType.parameters()[1].baseName(); - if (p.has_value() && s.has_value()) { - return; - } + if (s.has_value()) { + p = std::max(ShortDecimalType::kMinPrecision, s.value()); + if (p < LongDecimalType::kMaxPrecision) { + p = p.value() + rand32(0, LongDecimalType::kMaxPrecision - p.value()); + } - if (s.has_value()) { - p = std::max(ShortDecimalType::kMinPrecision, s.value()); - if (p < LongDecimalType::kMaxPrecision) { - p = p.value() + rand32(0, LongDecimalType::kMaxPrecision - p.value()); + integerBindings_[precision] = p.value(); + return; } - integerBindings_[precision] = p.value(); - return; - } + if (p.has_value()) { + s = rand32(0, p.value()); + integerBindings_[scale] = s.value(); + return; + } - if (p.has_value()) { + p = rand32(1, LongDecimalType::kMaxPrecision); s = rand32(0, p.value()); + + integerBindings_[precision] = p.value(); integerBindings_[scale] = s.value(); - return; } - - p = rand32(1, LongDecimalType::kMaxPrecision); - s = rand32(0, p.value()); - - integerBindings_[precision] = p.value(); - integerBindings_[scale] = s.value(); } void ArgumentTypeFuzzer::determineUnboundedTypeVariables() { @@ -154,24 +127,55 @@ TypePtr ArgumentTypeFuzzer::randOrderableType() { } bool ArgumentTypeFuzzer::fuzzArgumentTypes(uint32_t maxVariadicArgs) { - const auto& formalArgs = signature_.argumentTypes(); - auto formalArgsCnt = formalArgs.size(); - - if (returnType_) { + if (returnType_ == nullptr) { + for (const auto& [name, _] : signature_.variables()) { + bindings_.insert({name, nullptr}); + } + } else { exec::ReverseSignatureBinder binder{signature_, returnType_}; if (!binder.tryBind()) { return false; } + + auto types = signature_.argumentTypes(); + types.emplace_back(signature_.returnType()); + for (const auto& type : types) { + std::vector decimalTypes; + findDecimalTypes(type, decimalTypes); + if (decimalTypes.empty()) { + continue; + } + + // Verify if the precision and scale variables of argument types and + // return type can be bound to constant values. If not and extra + // constraint is provided, argument types cannot be generated without + // following these constraints. + for (const auto& decimalType : decimalTypes) { + const auto [p, s] = tryBindFixedPrecisionScale(decimalType); + + const auto& precision = decimalType.parameters()[0].baseName(); + const auto& scale = decimalType.parameters()[1].baseName(); + + const auto hasConstraint = [&](const std::string& name) { + return !variables().at(name).constraint().empty(); + }; + + if ((!p.has_value() && hasConstraint(precision)) || + (!s.has_value() && hasConstraint(scale))) { + return false; + } + } + } + bindings_ = binder.bindings(); integerBindings_ = binder.integerBindings(); - } else { - for (const auto& [name, _] : signature_.variables()) { - bindings_.insert({name, nullptr}); - } } + const auto& formalArgs = signature_.argumentTypes(); + auto formalArgsCnt = formalArgs.size(); + determineUnboundedTypeVariables(); - for (const auto& argType : signature_.argumentTypes()) { + for (const auto& argType : formalArgs) { determineUnboundedIntegerVariables(argType); } for (auto i = 0; i < formalArgsCnt; i++) { @@ -226,4 +230,54 @@ int32_t ArgumentTypeFuzzer::rand32(int32_t min, int32_t max) { return boost::random::uniform_int_distribution(min, max)(rng_); } +std::optional ArgumentTypeFuzzer::tryFixedBinding( + const std::string& name) { + auto it = variables().find(name); + if (it == variables().end()) { + VELOX_CHECK( + isPositiveInteger(name), + "Precision and scale of a decimal type must refer to a variable " + "or specify a positive integer constant: {}", + name) + return std::stoi(name); + } + + if (integerBindings_.count(name) > 0) { + return integerBindings_[name]; + } + + if (isPositiveInteger(it->second.constraint())) { + const auto value = std::stoi(it->second.constraint()); + integerBindings_[name] = value; + return value; + } + + return std::nullopt; +} + +std::pair, std::optional> +ArgumentTypeFuzzer::tryBindFixedPrecisionScale( + const exec::TypeSignature& type) { + VELOX_CHECK(isDecimalBaseName(type.baseName())); + VELOX_CHECK_EQ(2, type.parameters().size()) + + const auto& precisionName = type.parameters()[0].baseName(); + const auto& scaleName = type.parameters()[1].baseName(); + + std::optional p = tryFixedBinding(precisionName); + std::optional s = tryFixedBinding(scaleName); + return {p, s}; +} + +void ArgumentTypeFuzzer::findDecimalTypes( + const exec::TypeSignature& type, + std::vector& decimalTypes) const { + if (isDecimalBaseName(type.baseName())) { + decimalTypes.push_back(type); + } + for (const auto& param : type.parameters()) { + findDecimalTypes(param, decimalTypes); + } +} + } // namespace facebook::velox::fuzzer diff --git a/velox/expression/fuzzer/ArgumentTypeFuzzer.h b/velox/expression/fuzzer/ArgumentTypeFuzzer.h index 9a01ef9c5e4a..08c3375df9f8 100644 --- a/velox/expression/fuzzer/ArgumentTypeFuzzer.h +++ b/velox/expression/fuzzer/ArgumentTypeFuzzer.h @@ -82,6 +82,22 @@ class ArgumentTypeFuzzer { /// Generates an orderable random type, including structs, and arrays. TypePtr randOrderableType(); + // Bind 'name' variable, if not already bound, using 'constant' constraint + // ('name'='123'). Return bound value if 'name' is already bound or was + // successfully bound to a constant value. Return std::nullopt otherwise. + std::optional tryFixedBinding(const std::string& name); + + // Bind the precision and scale variables in a decimal type signature to + // constant values. Return std::nullopt if the variable cannot be bound to a + // constant value. + std::pair, std::optional> tryBindFixedPrecisionScale( + const exec::TypeSignature& type); + + // Find all the nested decimal type signatures recursively. + void findDecimalTypes( + const exec::TypeSignature& type, + std::vector& decimalTypes) const; + const exec::FunctionSignature& signature_; TypePtr returnType_; diff --git a/velox/expression/fuzzer/ExpressionFuzzer.cpp b/velox/expression/fuzzer/ExpressionFuzzer.cpp index f8b656cb20fc..8eae0d3b8e8f 100644 --- a/velox/expression/fuzzer/ExpressionFuzzer.cpp +++ b/velox/expression/fuzzer/ExpressionFuzzer.cpp @@ -437,16 +437,20 @@ bool useTypeName( bool isSupportedSignature( const exec::FunctionSignature& signature, - bool enableComplexType) { - // Not supporting lambda functions, or functions using decimal and - // timestamp with time zone types. + bool enableComplexType, + bool enableDecimalType) { + // When enableComplexType is disabled, not supporting complex functions. + const bool useComplexType = useTypeName(signature, "array") || + useTypeName(signature, "map") || useTypeName(signature, "row"); + // When enableDecimalType is disabled, not supporting decimal functions. Not + // supporting functions using custom types, timestamp with time zone types and + // interval day to second types. return !( useTypeName(signature, "opaque") || - useTypeName(signature, "long_decimal") || - useTypeName(signature, "short_decimal") || - useTypeName(signature, "decimal") || useTypeName(signature, "timestamp with time zone") || useTypeName(signature, "interval day to second") || + (!enableDecimalType && useTypeName(signature, "decimal")) || + (!enableComplexType && useComplexType) || (enableComplexType && useTypeName(signature, "unknown"))); } @@ -528,10 +532,13 @@ ExpressionFuzzer::ExpressionFuzzer( FunctionSignatureMap signatureMap, size_t initialSeed, const std::shared_ptr& vectorFuzzer, - const std::optional& options) + const std::optional& options, + const std::unordered_map>& + argGenerators) : options_(options.value_or(Options())), vectorFuzzer_(vectorFuzzer), - state{rng_, std::max(1, options_.maxLevelOfNesting)} { + state{rng_, std::max(1, options_.maxLevelOfNesting)}, + argGenerators_(argGenerators) { VELOX_CHECK(vectorFuzzer, "Vector fuzzer must be provided"); seed(initialSeed); @@ -555,10 +562,14 @@ ExpressionFuzzer::ExpressionFuzzer( for (const auto& signature : function.second) { ++totalFunctionSignatures; - if (!isSupportedSignature(*signature, options_.enableComplexTypes)) { + if (!isSupportedSignature( + *signature, + options_.enableComplexTypes, + options_.enableDecimalType)) { continue; } - if (!(signature->variables().empty() || options_.enableComplexTypes)) { + if (!(signature->variables().empty() || options_.enableComplexTypes || + options_.enableDecimalType)) { LOG(WARNING) << "Skipping unsupported signature: " << function.first << signature->toString(); continue; @@ -666,8 +677,10 @@ ExpressionFuzzer::ExpressionFuzzer( sortSignatureTemplates(signatureTemplates_); for (const auto& it : signatureTemplates_) { - auto& returnType = it.signature->returnType().baseName(); - auto* returnTypeKey = &returnType; + const auto returnType = + exec::sanitizeName(it.signature->returnType().baseName()); + const auto* returnTypeKey = &returnType; + if (it.typeVariables.find(returnType) != it.typeVariables.end()) { // Return type is a template variable. returnTypeKey = &kTypeParameterName; @@ -741,7 +754,7 @@ void ExpressionFuzzer::addToTypeToExpressionListByTicketTimes( const std::string& funcName) { int tickets = getTickets(funcName); for (int i = 0; i < tickets; i++) { - typeToExpressionList_[type].push_back(funcName); + typeToExpressionList_[exec::sanitizeName(type)].push_back(funcName); } } @@ -1015,7 +1028,8 @@ core::TypedExprPtr ExpressionFuzzer::generateExpression( } else { expression = generateExpressionFromConcreteSignatures( returnType, chosenFunctionName); - if (!expression && options_.enableComplexTypes) { + if (!expression && + (options_.enableComplexTypes || options_.enableDecimalType)) { expression = generateExpressionFromSignatureTemplate( returnType, chosenFunctionName); } @@ -1210,8 +1224,26 @@ core::TypedExprPtr ExpressionFuzzer::generateExpressionFromSignatureTemplate( auto chosenSignature = *chosen->signature; ArgumentTypeFuzzer fuzzer{chosenSignature, returnType, rng_}; - VELOX_CHECK_EQ(fuzzer.fuzzArgumentTypes(options_.maxNumVarArgs), true); - auto& argumentTypes = fuzzer.argumentTypes(); + + std::vector argumentTypes; + if (fuzzer.fuzzArgumentTypes(options_.maxNumVarArgs)) { + // Use the argument fuzzer to generate argument types. + argumentTypes = fuzzer.argumentTypes(); + } else { + auto it = argGenerators_.find(functionName); + // Since the argument type fuzzer cannot produce argument types, argument + // generators should be provided. + VELOX_CHECK( + it != argGenerators_.end(), + "Cannot generate argument types for {} with return type {}.", + functionName, + returnType->toString()); + argumentTypes = it->second->generateArgs(chosenSignature, returnType, rng_); + if (argumentTypes.empty()) { + return nullptr; + } + } + auto constantArguments = chosenSignature.constantArguments(); // ArgumentFuzzer may generate duplicate arguments if the signature's diff --git a/velox/expression/fuzzer/ExpressionFuzzer.h b/velox/expression/fuzzer/ExpressionFuzzer.h index 1e0dc9a74f74..a703ddb7eef9 100644 --- a/velox/expression/fuzzer/ExpressionFuzzer.h +++ b/velox/expression/fuzzer/ExpressionFuzzer.h @@ -19,6 +19,7 @@ #include "velox/core/ITypedExpr.h" #include "velox/core/QueryCtx.h" #include "velox/expression/Expr.h" +#include "velox/expression/fuzzer/ArgGenerator.h" #include "velox/expression/fuzzer/FuzzerToolkit.h" #include "velox/expression/tests/ExpressionVerifier.h" #include "velox/functions/FunctionRegistry.h" @@ -48,6 +49,10 @@ class ExpressionFuzzer { // types. bool enableComplexTypes = false; + // Enable testing of function signatures with decimal argument or return + // types. + bool enableDecimalType = false; + // Enable generation of expressions where one input column can be used by // multiple subexpressions. bool enableColumnReuse = false; @@ -103,7 +108,9 @@ class ExpressionFuzzer { FunctionSignatureMap signatureMap, size_t initialSeed, const std::shared_ptr& vectorFuzzer, - const std::optional& options = std::nullopt); + const std::optional& options = std::nullopt, + const std::unordered_map>& + argGenerators = {}); template void registerFuncOverride(TFunc func, const std::string& name); @@ -416,6 +423,9 @@ class ExpressionFuzzer { } state; friend class ExpressionFuzzerUnitTest; + + // Maps from function name to a specific generator of argument types. + std::unordered_map> argGenerators_; }; } // namespace facebook::velox::fuzzer diff --git a/velox/expression/fuzzer/ExpressionFuzzerTest.cpp b/velox/expression/fuzzer/ExpressionFuzzerTest.cpp index 30dd8dc252d1..90e2c7222681 100644 --- a/velox/expression/fuzzer/ExpressionFuzzerTest.cpp +++ b/velox/expression/fuzzer/ExpressionFuzzerTest.cpp @@ -18,7 +18,14 @@ #include #include +#include "velox/expression/fuzzer/ArgGenerator.h" #include "velox/expression/fuzzer/FuzzerRunner.h" +#include "velox/functions/prestosql/fuzzer/DivideArgGenerator.h" +#include "velox/functions/prestosql/fuzzer/FloorAndRoundArgGenerator.h" +#include "velox/functions/prestosql/fuzzer/ModulusArgGenerator.h" +#include "velox/functions/prestosql/fuzzer/MultiplyArgGenerator.h" +#include "velox/functions/prestosql/fuzzer/PlusMinusArgGenerator.h" +#include "velox/functions/prestosql/fuzzer/TruncateArgGenerator.h" #include "velox/functions/prestosql/registration/RegistrationFunctions.h" DEFINE_int64( @@ -27,6 +34,8 @@ DEFINE_int64( "Initial seed for random number generator used to reproduce previous " "results (0 means start with random seed)."); +using namespace facebook::velox::exec::test; +using facebook::velox::fuzzer::ArgGenerator; using facebook::velox::fuzzer::FuzzerRunner; int main(int argc, char** argv) { @@ -69,5 +78,16 @@ int main(int argc, char** argv) { "regexp_split", }; size_t initialSeed = FLAGS_seed == 0 ? std::time(nullptr) : FLAGS_seed; - return FuzzerRunner::run(initialSeed, skipFunctions, {{}}); + + std::unordered_map> argGenerators = + {{"plus", std::make_shared()}, + {"minus", std::make_shared()}, + {"multiply", std::make_shared()}, + {"divide", std::make_shared()}, + {"floor", std::make_shared()}, + {"round", std::make_shared()}, + {"mod", std::make_shared()}, + {"truncate", std::make_shared()}}; + + return FuzzerRunner::run(initialSeed, skipFunctions, {{}}, argGenerators); } diff --git a/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp b/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp index 1415db5364fe..cbd7979bbf51 100644 --- a/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp +++ b/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp @@ -80,7 +80,9 @@ RowVectorPtr wrapChildren( ExpressionFuzzerVerifier::ExpressionFuzzerVerifier( const FunctionSignatureMap& signatureMap, size_t initialSeed, - const ExpressionFuzzerVerifier::Options& options) + const ExpressionFuzzerVerifier::Options& options, + const std::unordered_map>& + argGenerators) : options_(options), queryCtx_(core::QueryCtx::create( nullptr, @@ -98,7 +100,8 @@ ExpressionFuzzerVerifier::ExpressionFuzzerVerifier( signatureMap, initialSeed, vectorFuzzer_, - options.expressionFuzzerOptions) { + options.expressionFuzzerOptions, + argGenerators) { seed(initialSeed); // Init stats and register listener. diff --git a/velox/expression/fuzzer/ExpressionFuzzerVerifier.h b/velox/expression/fuzzer/ExpressionFuzzerVerifier.h index f651ad554143..be00ddca9bf1 100644 --- a/velox/expression/fuzzer/ExpressionFuzzerVerifier.h +++ b/velox/expression/fuzzer/ExpressionFuzzerVerifier.h @@ -51,7 +51,9 @@ class ExpressionFuzzerVerifier { ExpressionFuzzerVerifier( const FunctionSignatureMap& signatureMap, size_t initialSeed, - const Options& options); + const Options& options, + const std::unordered_map>& + argGenerators); // This function starts the test that is performed by the // ExpressionFuzzerVerifier which is generating random expressions and diff --git a/velox/expression/fuzzer/FuzzerRunner.cpp b/velox/expression/fuzzer/FuzzerRunner.cpp index 56c58a365841..97ef1fc7bd67 100644 --- a/velox/expression/fuzzer/FuzzerRunner.cpp +++ b/velox/expression/fuzzer/FuzzerRunner.cpp @@ -121,6 +121,11 @@ DEFINE_bool( false, "Enable testing of function signatures with complex argument or return types."); +DEFINE_bool( + velox_fuzzer_enable_decimal_type, + false, + "Enable testing of function signatures with decimal argument or return types."); + DEFINE_bool( velox_fuzzer_enable_column_reuse, false, @@ -168,6 +173,7 @@ ExpressionFuzzer::Options getExpressionFuzzerOptions( opts.enableVariadicSignatures = FLAGS_enable_variadic_signatures; opts.enableDereference = FLAGS_enable_dereference; opts.enableComplexTypes = FLAGS_velox_fuzzer_enable_complex_types; + opts.enableDecimalType = FLAGS_velox_fuzzer_enable_decimal_type; opts.enableColumnReuse = FLAGS_velox_fuzzer_enable_column_reuse; opts.enableExpressionReuse = FLAGS_velox_fuzzer_enable_expression_reuse; opts.functionTickets = FLAGS_assign_function_tickets; @@ -204,8 +210,10 @@ ExpressionFuzzerVerifier::Options getExpressionFuzzerVerifierOptions( int FuzzerRunner::run( size_t seed, const std::unordered_set& skipFunctions, - const std::unordered_map& queryConfigs) { - runFromGtest(seed, skipFunctions, queryConfigs); + const std::unordered_map& queryConfigs, + const std::unordered_map>& + argGenerators) { + runFromGtest(seed, skipFunctions, queryConfigs, argGenerators); return RUN_ALL_TESTS(); } @@ -213,13 +221,16 @@ int FuzzerRunner::run( void FuzzerRunner::runFromGtest( size_t seed, const std::unordered_set& skipFunctions, - const std::unordered_map& queryConfigs) { + const std::unordered_map& queryConfigs, + const std::unordered_map>& + argGenerators) { memory::MemoryManager::testingSetInstance({}); auto signatures = facebook::velox::getFunctionSignatures(); ExpressionFuzzerVerifier( signatures, seed, - getExpressionFuzzerVerifierOptions(skipFunctions, queryConfigs)) + getExpressionFuzzerVerifierOptions(skipFunctions, queryConfigs), + argGenerators) .go(); } } // namespace facebook::velox::fuzzer diff --git a/velox/expression/fuzzer/FuzzerRunner.h b/velox/expression/fuzzer/FuzzerRunner.h index 0eda0ecd1d7a..c7c5cda54c65 100644 --- a/velox/expression/fuzzer/FuzzerRunner.h +++ b/velox/expression/fuzzer/FuzzerRunner.h @@ -33,12 +33,16 @@ class FuzzerRunner { static int run( size_t seed, const std::unordered_set& skipFunctions, - const std::unordered_map& queryConfigs); + const std::unordered_map& queryConfigs, + const std::unordered_map>& + argGenerators); static void runFromGtest( size_t seed, const std::unordered_set& skipFunctions, - const std::unordered_map& queryConfigs); + const std::unordered_map& queryConfigs, + const std::unordered_map>& + argGenerators); }; } // namespace facebook::velox::fuzzer diff --git a/velox/expression/fuzzer/SparkExpressionFuzzerTest.cpp b/velox/expression/fuzzer/SparkExpressionFuzzerTest.cpp index ffba105e2e08..ba5abd02bca3 100644 --- a/velox/expression/fuzzer/SparkExpressionFuzzerTest.cpp +++ b/velox/expression/fuzzer/SparkExpressionFuzzerTest.cpp @@ -24,6 +24,14 @@ #include "velox/expression/fuzzer/FuzzerRunner.h" #include "velox/functions/sparksql/Register.h" +#include "velox/functions/sparksql/fuzzer/AddSubtractArgGenerator.h" +#include "velox/functions/sparksql/fuzzer/DivideArgGenerator.h" +#include "velox/functions/sparksql/fuzzer/MakeTimestampArgGenerator.h" +#include "velox/functions/sparksql/fuzzer/MultiplyArgGenerator.h" +#include "velox/functions/sparksql/fuzzer/UnscaledValueArgGenerator.h" + +using namespace facebook::velox::functions::sparksql::fuzzer; +using facebook::velox::fuzzer::ArgGenerator; DEFINE_int64( seed, @@ -58,7 +66,18 @@ int main(int argc, char** argv) { // Required by spark_partition_id function. std::unordered_map queryConfigs = { - {facebook::velox::core::QueryConfig::kSparkPartitionId, "123"}}; + {facebook::velox::core::QueryConfig::kSparkPartitionId, "123"}, + {facebook::velox::core::QueryConfig::kSessionTimezone, + "America/Los_Angeles"}}; + + std::unordered_map> argGenerators = + {{"add", std::make_shared()}, + {"subtract", std::make_shared()}, + {"multiply", std::make_shared()}, + {"divide", std::make_shared()}, + {"unscaled_value", std::make_shared()}, + {"make_timestamp", std::make_shared()}}; - return FuzzerRunner::run(FLAGS_seed, skipFunctions, queryConfigs); + return FuzzerRunner::run( + FLAGS_seed, skipFunctions, queryConfigs, argGenerators); } diff --git a/velox/expression/fuzzer/tests/ArgumentTypeFuzzerTest.cpp b/velox/expression/fuzzer/tests/ArgumentTypeFuzzerTest.cpp index a60d16e24348..ca4ee13b64d5 100644 --- a/velox/expression/fuzzer/tests/ArgumentTypeFuzzerTest.cpp +++ b/velox/expression/fuzzer/tests/ArgumentTypeFuzzerTest.cpp @@ -237,6 +237,21 @@ TEST_F(ArgumentTypeFuzzerTest, unsupported) { .argumentType("decimal(b_precision, b_scale)") .build(); + testFuzzingFailure(signature, ROW({ARRAY(DECIMAL(13, 6))})); + + // Constraints on the argument types are not supported. + signature = exec::FunctionSignatureBuilder() + .integerVariable("a_scale") + .integerVariable("b_scale") + .integerVariable("a_precision", "a_scale + 1") + .integerVariable("b_precision") + .integerVariable("r_precision") + .integerVariable("r_scale") + .returnType("decimal(r_precision, r_scale)") + .argumentType("decimal(a_precision, a_scale)") + .argumentType("decimal(b_precision, b_scale)") + .build(); + testFuzzingFailure(signature, DECIMAL(13, 6)); } @@ -554,6 +569,29 @@ TEST_F(ArgumentTypeFuzzerTest, fuzzDecimalArgumentTypes) { EXPECT_TRUE(argTypes[1]->isDecimal()); EXPECT_EQ(argTypes[0]->toString(), argTypes[2]->toString()); EXPECT_EQ(argTypes[1]->toString(), argTypes[3]->toString()); + + // Decimal argument types are nested in complex types. + signature = exec::FunctionSignatureBuilder() + .integerVariable("a_scale") + .integerVariable("b_scale") + .integerVariable("a_precision") + .integerVariable("b_precision") + .integerVariable("r_precision") + .integerVariable("r_scale") + .returnType("decimal(r_precision, r_scale)") + .argumentType("row(array(decimal(a_precision, a_scale)))") + .argumentType("row(array(decimal(b_precision, b_scale)))") + .build(); + argTypes = fuzzArgumentTypes(*signature, DECIMAL(10, 7)); + ASSERT_EQ(2, argTypes.size()); + EXPECT_TRUE(argTypes[0]->isRow()); + EXPECT_TRUE(argTypes[0]->asRow().childAt(0)->isArray()); + EXPECT_TRUE( + argTypes[0]->asRow().childAt(0)->asArray().elementType()->isDecimal()); + EXPECT_TRUE(argTypes[1]->isRow()); + EXPECT_TRUE(argTypes[1]->asRow().childAt(0)->isArray()); + EXPECT_TRUE( + argTypes[1]->asRow().childAt(0)->asArray().elementType()->isDecimal()); } TEST_F(ArgumentTypeFuzzerTest, fuzzDecimalReturnType) { @@ -643,6 +681,27 @@ TEST_F(ArgumentTypeFuzzerTest, fuzzDecimalReturnType) { returnType = fuzzReturnType(*signature); EXPECT_EQ(DECIMAL(10, 7)->toString(), returnType->toString()); + + // Decimal return type is nested in complex types. + signature = + exec::FunctionSignatureBuilder() + .integerVariable("a_scale") + .integerVariable("b_scale") + .integerVariable("a_precision") + .integerVariable("b_precision") + .returnType( + "row(array(decimal(a_precision, a_scale)), array(decimal(b_precision, b_scale)))") + .argumentType("decimal(a_precision, a_scale)") + .argumentType("decimal(b_precision, b_scale)") + .build(); + returnType = fuzzReturnType(*signature); + EXPECT_TRUE(returnType->isRow()); + EXPECT_TRUE(returnType->asRow().childAt(0)->isArray()); + EXPECT_TRUE(returnType->asRow().childAt(1)->isArray()); + EXPECT_TRUE( + returnType->asRow().childAt(0)->asArray().elementType()->isDecimal()); + EXPECT_TRUE( + returnType->asRow().childAt(1)->asArray().elementType()->isDecimal()); } } // namespace facebook::velox::fuzzer::test diff --git a/velox/functions/sparksql/MakeTimestamp.cpp b/velox/functions/sparksql/MakeTimestamp.cpp index b2904d25540e..89226954fddb 100644 --- a/velox/functions/sparksql/MakeTimestamp.cpp +++ b/velox/functions/sparksql/MakeTimestamp.cpp @@ -157,7 +157,8 @@ class MakeTimestampFunction : public exec::VectorFunction { static std::vector> signatures() { return { exec::FunctionSignatureBuilder() - .integerVariable("precision") + // precision <= 18. + .integerVariable("precision", "min(precision, 18)") .returnType("timestamp") .argumentType("integer") .argumentType("integer") @@ -167,7 +168,8 @@ class MakeTimestampFunction : public exec::VectorFunction { .argumentType("decimal(precision, 6)") .build(), exec::FunctionSignatureBuilder() - .integerVariable("precision") + // precision <= 18. + .integerVariable("precision", "min(precision, 18)") .returnType("timestamp") .argumentType("integer") .argumentType("integer") diff --git a/velox/functions/sparksql/UnscaledValueFunction.cpp b/velox/functions/sparksql/UnscaledValueFunction.cpp index 7833db779d35..3f7858a2d62e 100644 --- a/velox/functions/sparksql/UnscaledValueFunction.cpp +++ b/velox/functions/sparksql/UnscaledValueFunction.cpp @@ -49,7 +49,8 @@ class UnscaledValueFunction final : public exec::VectorFunction { std::vector> unscaledValueSignatures() { return {exec::FunctionSignatureBuilder() - .integerVariable("precision") + // precision <= 18. + .integerVariable("precision", "min(precision, 18)") .integerVariable("scale") .returnType("bigint") .argumentType("DECIMAL(precision, scale)")