diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9a84ab2aeed2..6c8d4f75ca51 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -206,11 +206,11 @@ with a benchmark. ``` # Test the new function in isolation. Use --only flag to restrict the set of functions # and run for 60 seconds or longer. - velox_expression_fuzzer_test --only --duration_sec 60 --logtostderr=1 --enable_variadic_signatures --velox_fuzzer_enable_complex_types --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse + velox_expression_fuzzer_test --only --duration_sec 60 --logtostderr=1 --enable_variadic_signatures --velox_fuzzer_enable_complex_types --velox_fuzzer_enable_decimal_type --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse # Test the new function in combination with other functions. Do not restrict the set # of functions and run for 10 minutes (600 seconds) or longer. - velox_expression_fuzzer_test --duration_sec 600 --logtostderr=1 --enable_variadic_signatures --velox_fuzzer_enable_complex_types --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse + velox_expression_fuzzer_test --duration_sec 600 --logtostderr=1 --enable_variadic_signatures --velox_fuzzer_enable_complex_types --velox_fuzzer_enable_decimal_type --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse ``` Here are example PRs: diff --git a/velox/docs/develop/testing/fuzzer.rst b/velox/docs/develop/testing/fuzzer.rst index 639acda6ee5e..97dd226698a8 100644 --- a/velox/docs/develop/testing/fuzzer.rst +++ b/velox/docs/develop/testing/fuzzer.rst @@ -228,6 +228,8 @@ Below are arguments that toggle certain fuzzer features in Expression Fuzzer: * ``--velox_fuzzer_enable_complex_types``: Enable testing of function signatures with complex argument or return types. Default is false. +* ``--velox_fuzzer_enable_decimal_type``: Enable testing of function signatures with decimal argument or return type. Default is false. + * ``--lazy_vector_generation_ratio``: Specifies the probability with which columns in the input row vector will be selected to be wrapped in lazy encoding (expressed as double from 0 to 1). Default is 0.0. * ``--velox_fuzzer_enable_column_reuse``: Enable generation of expressions where one input column can be used by multiple subexpressions. Default is false. diff --git a/velox/expression/ReverseSignatureBinder.cpp b/velox/expression/ReverseSignatureBinder.cpp index 495920e6bf1f..274d2a06480f 100644 --- a/velox/expression/ReverseSignatureBinder.cpp +++ b/velox/expression/ReverseSignatureBinder.cpp @@ -18,27 +18,7 @@ namespace facebook::velox::exec { -bool ReverseSignatureBinder::hasConstrainedIntegerVariable( - const TypeSignature& type) const { - if (type.parameters().empty()) { - auto it = variables().find(type.baseName()); - return it != variables().end() && it->second.isIntegerParameter() && - it->second.constraint() != ""; - } - - const auto& parameters = type.parameters(); - for (const auto& parameter : parameters) { - if (hasConstrainedIntegerVariable(parameter)) { - return true; - } - } - return false; -} - bool ReverseSignatureBinder::tryBind() { - if (hasConstrainedIntegerVariable(signature_.returnType())) { - return false; - } tryBindSucceeded_ = SignatureBinderBase::tryBind(signature_.returnType(), returnType_); return tryBindSucceeded_; diff --git a/velox/expression/ReverseSignatureBinder.h b/velox/expression/ReverseSignatureBinder.h index 087adc4dbe77..3da800a5edbb 100644 --- a/velox/expression/ReverseSignatureBinder.h +++ b/velox/expression/ReverseSignatureBinder.h @@ -54,10 +54,6 @@ class ReverseSignatureBinder : private SignatureBinderBase { } private: - // Return whether there is a constraint on an integer variable in type - // signature. - bool hasConstrainedIntegerVariable(const TypeSignature& type) const; - const TypePtr returnType_; // True if 'tryBind' has been called and succeeded. False otherwise. diff --git a/velox/expression/fuzzer/ArgumentTypeFuzzer.cpp b/velox/expression/fuzzer/ArgumentTypeFuzzer.cpp index 1d7b784f2990..4e26e67a17a0 100644 --- a/velox/expression/fuzzer/ArgumentTypeFuzzer.cpp +++ b/velox/expression/fuzzer/ArgumentTypeFuzzer.cpp @@ -58,70 +58,43 @@ bool isPositiveInteger(const std::string& str) { void ArgumentTypeFuzzer::determineUnboundedIntegerVariables( const exec::TypeSignature& type) { - if (!isDecimalBaseName(type.baseName())) { + std::vector decimalTypes; + findDecimalTypes(type, decimalTypes); + if (decimalTypes.empty()) { return; } + for (const auto& decimalType : decimalTypes) { + auto [p, s] = tryBindFixedPrecisionScale(decimalType); - VELOX_CHECK_EQ(2, type.parameters().size()) - - const auto& precision = type.parameters()[0].baseName(); - const auto& scale = type.parameters()[1].baseName(); - - // Bind 'name' variable, if not already bound, using 'constant' constraint - // ('name'='123'). Return bound value if 'name' is already bound or was - // successfully bound to a constant value. Return std::nullopt otherwise. - auto tryFixedBinding = [&](const auto& name) -> std::optional { - auto it = variables().find(name); - if (it == variables().end()) { - VELOX_CHECK( - isPositiveInteger(name), - "Precision and scale of a decimal type must refer to a variable " - "or specify a position integer constant: {}", - name) - return std::stoi(name); - } - - if (integerBindings_.count(name) > 0) { - return integerBindings_[name]; + if (p.has_value() && s.has_value()) { + return; } - if (isPositiveInteger(it->second.constraint())) { - const auto value = std::stoi(it->second.constraint()); - integerBindings_[name] = value; - return value; - } - - return std::nullopt; - }; - - std::optional p = tryFixedBinding(precision); - std::optional s = tryFixedBinding(scale); + const auto& precision = decimalType.parameters()[0].baseName(); + const auto& scale = decimalType.parameters()[1].baseName(); - if (p.has_value() && s.has_value()) { - return; - } + if (s.has_value()) { + p = std::max(ShortDecimalType::kMinPrecision, s.value()); + if (p < LongDecimalType::kMaxPrecision) { + p = p.value() + rand32(0, LongDecimalType::kMaxPrecision - p.value()); + } - if (s.has_value()) { - p = std::max(ShortDecimalType::kMinPrecision, s.value()); - if (p < LongDecimalType::kMaxPrecision) { - p = p.value() + rand32(0, LongDecimalType::kMaxPrecision - p.value()); + integerBindings_[precision] = p.value(); + return; } - integerBindings_[precision] = p.value(); - return; - } + if (p.has_value()) { + s = rand32(0, p.value()); + integerBindings_[scale] = s.value(); + return; + } - if (p.has_value()) { + p = rand32(1, LongDecimalType::kMaxPrecision); s = rand32(0, p.value()); + + integerBindings_[precision] = p.value(); integerBindings_[scale] = s.value(); - return; } - - p = rand32(1, LongDecimalType::kMaxPrecision); - s = rand32(0, p.value()); - - integerBindings_[precision] = p.value(); - integerBindings_[scale] = s.value(); } void ArgumentTypeFuzzer::determineUnboundedTypeVariables() { @@ -154,24 +127,55 @@ TypePtr ArgumentTypeFuzzer::randOrderableType() { } bool ArgumentTypeFuzzer::fuzzArgumentTypes(uint32_t maxVariadicArgs) { - const auto& formalArgs = signature_.argumentTypes(); - auto formalArgsCnt = formalArgs.size(); - - if (returnType_) { + if (returnType_ == nullptr) { + for (const auto& [name, _] : signature_.variables()) { + bindings_.insert({name, nullptr}); + } + } else { exec::ReverseSignatureBinder binder{signature_, returnType_}; if (!binder.tryBind()) { return false; } + + auto types = signature_.argumentTypes(); + types.emplace_back(signature_.returnType()); + for (const auto& type : types) { + std::vector decimalTypes; + findDecimalTypes(type, decimalTypes); + if (decimalTypes.empty()) { + continue; + } + + // Verify if the precision and scale variables of argument types and + // return type can be bound to constant values. If not and extra + // constraint is provided, argument types cannot be generated without + // following these constraints. + for (const auto& decimalType : decimalTypes) { + const auto [p, s] = tryBindFixedPrecisionScale(decimalType); + + const auto& precision = decimalType.parameters()[0].baseName(); + const auto& scale = decimalType.parameters()[1].baseName(); + + const auto hasConstraint = [&](const std::string& name) { + return !variables().at(name).constraint().empty(); + }; + + if ((!p.has_value() && hasConstraint(precision)) || + (!s.has_value() && hasConstraint(scale))) { + return false; + } + } + } + bindings_ = binder.bindings(); integerBindings_ = binder.integerBindings(); - } else { - for (const auto& [name, _] : signature_.variables()) { - bindings_.insert({name, nullptr}); - } } + const auto& formalArgs = signature_.argumentTypes(); + auto formalArgsCnt = formalArgs.size(); + determineUnboundedTypeVariables(); - for (const auto& argType : signature_.argumentTypes()) { + for (const auto& argType : formalArgs) { determineUnboundedIntegerVariables(argType); } for (auto i = 0; i < formalArgsCnt; i++) { @@ -226,4 +230,54 @@ int32_t ArgumentTypeFuzzer::rand32(int32_t min, int32_t max) { return boost::random::uniform_int_distribution(min, max)(rng_); } +std::optional ArgumentTypeFuzzer::tryFixedBinding( + const std::string& name) { + auto it = variables().find(name); + if (it == variables().end()) { + VELOX_CHECK( + isPositiveInteger(name), + "Precision and scale of a decimal type must refer to a variable " + "or specify a positive integer constant: {}", + name) + return std::stoi(name); + } + + if (integerBindings_.count(name) > 0) { + return integerBindings_[name]; + } + + if (isPositiveInteger(it->second.constraint())) { + const auto value = std::stoi(it->second.constraint()); + integerBindings_[name] = value; + return value; + } + + return std::nullopt; +} + +std::pair, std::optional> +ArgumentTypeFuzzer::tryBindFixedPrecisionScale( + const exec::TypeSignature& type) { + VELOX_CHECK(isDecimalBaseName(type.baseName())); + VELOX_CHECK_EQ(2, type.parameters().size()) + + const auto& precisionName = type.parameters()[0].baseName(); + const auto& scaleName = type.parameters()[1].baseName(); + + std::optional p = tryFixedBinding(precisionName); + std::optional s = tryFixedBinding(scaleName); + return {p, s}; +} + +void ArgumentTypeFuzzer::findDecimalTypes( + const exec::TypeSignature& type, + std::vector& decimalTypes) const { + if (isDecimalBaseName(type.baseName())) { + decimalTypes.push_back(type); + } + for (const auto& param : type.parameters()) { + findDecimalTypes(param, decimalTypes); + } +} + } // namespace facebook::velox::fuzzer diff --git a/velox/expression/fuzzer/ArgumentTypeFuzzer.h b/velox/expression/fuzzer/ArgumentTypeFuzzer.h index 9a01ef9c5e4a..08c3375df9f8 100644 --- a/velox/expression/fuzzer/ArgumentTypeFuzzer.h +++ b/velox/expression/fuzzer/ArgumentTypeFuzzer.h @@ -82,6 +82,22 @@ class ArgumentTypeFuzzer { /// Generates an orderable random type, including structs, and arrays. TypePtr randOrderableType(); + // Bind 'name' variable, if not already bound, using 'constant' constraint + // ('name'='123'). Return bound value if 'name' is already bound or was + // successfully bound to a constant value. Return std::nullopt otherwise. + std::optional tryFixedBinding(const std::string& name); + + // Bind the precision and scale variables in a decimal type signature to + // constant values. Return std::nullopt if the variable cannot be bound to a + // constant value. + std::pair, std::optional> tryBindFixedPrecisionScale( + const exec::TypeSignature& type); + + // Find all the nested decimal type signatures recursively. + void findDecimalTypes( + const exec::TypeSignature& type, + std::vector& decimalTypes) const; + const exec::FunctionSignature& signature_; TypePtr returnType_; diff --git a/velox/expression/fuzzer/ExpressionFuzzer.cpp b/velox/expression/fuzzer/ExpressionFuzzer.cpp index e981a8c0f36f..4e0be5d2fc18 100644 --- a/velox/expression/fuzzer/ExpressionFuzzer.cpp +++ b/velox/expression/fuzzer/ExpressionFuzzer.cpp @@ -440,16 +440,20 @@ bool useTypeName( bool isSupportedSignature( const exec::FunctionSignature& signature, - bool enableComplexType) { - // Not supporting lambda functions, or functions using decimal and - // timestamp with time zone types. + bool enableComplexType, + bool enableDecimalType) { + // When enableComplexType is disabled, not supporting complex functions. + const bool useComplexType = useTypeName(signature, "array") || + useTypeName(signature, "map") || useTypeName(signature, "row"); + // When enableDecimalType is disabled, not supporting decimal functions. Not + // supporting functions using custom types, timestamp with time zone types and + // interval day to second types. return !( useTypeName(signature, "opaque") || - useTypeName(signature, "long_decimal") || - useTypeName(signature, "short_decimal") || - useTypeName(signature, "decimal") || useTypeName(signature, "timestamp with time zone") || useTypeName(signature, "interval day to second") || + (!enableDecimalType && useTypeName(signature, "decimal")) || + (!enableComplexType && useComplexType) || (enableComplexType && useTypeName(signature, "unknown"))); } @@ -531,10 +535,13 @@ ExpressionFuzzer::ExpressionFuzzer( FunctionSignatureMap signatureMap, size_t initialSeed, const std::shared_ptr& vectorFuzzer, - const std::optional& options) + const std::optional& options, + const std::unordered_map>& + argGenerators) : options_(options.value_or(Options())), vectorFuzzer_(vectorFuzzer), - state{rng_, std::max(1, options_.maxLevelOfNesting)} { + state{rng_, std::max(1, options_.maxLevelOfNesting)}, + argGenerators_(argGenerators) { VELOX_CHECK(vectorFuzzer, "Vector fuzzer must be provided"); seed(initialSeed); @@ -558,10 +565,14 @@ ExpressionFuzzer::ExpressionFuzzer( for (const auto& signature : function.second) { ++totalFunctionSignatures; - if (!isSupportedSignature(*signature, options_.enableComplexTypes)) { + if (!isSupportedSignature( + *signature, + options_.enableComplexTypes, + options_.enableDecimalType)) { continue; } - if (!(signature->variables().empty() || options_.enableComplexTypes)) { + if (!(signature->variables().empty() || options_.enableComplexTypes || + options_.enableDecimalType)) { LOG(WARNING) << "Skipping unsupported signature: " << function.first << signature->toString(); continue; @@ -669,8 +680,10 @@ ExpressionFuzzer::ExpressionFuzzer( sortSignatureTemplates(signatureTemplates_); for (const auto& it : signatureTemplates_) { - auto& returnType = it.signature->returnType().baseName(); - auto* returnTypeKey = &returnType; + const auto returnType = + exec::sanitizeName(it.signature->returnType().baseName()); + const auto* returnTypeKey = &returnType; + if (it.typeVariables.find(returnType) != it.typeVariables.end()) { // Return type is a template variable. returnTypeKey = &kTypeParameterName; @@ -744,7 +757,7 @@ void ExpressionFuzzer::addToTypeToExpressionListByTicketTimes( const std::string& funcName) { int tickets = getTickets(funcName); for (int i = 0; i < tickets; i++) { - typeToExpressionList_[type].push_back(funcName); + typeToExpressionList_[exec::sanitizeName(type)].push_back(funcName); } } @@ -1018,7 +1031,8 @@ core::TypedExprPtr ExpressionFuzzer::generateExpression( } else { expression = generateExpressionFromConcreteSignatures( returnType, chosenFunctionName); - if (!expression && options_.enableComplexTypes) { + if (!expression && + (options_.enableComplexTypes || options_.enableDecimalType)) { expression = generateExpressionFromSignatureTemplate( returnType, chosenFunctionName); } @@ -1213,8 +1227,26 @@ core::TypedExprPtr ExpressionFuzzer::generateExpressionFromSignatureTemplate( auto chosenSignature = *chosen->signature; ArgumentTypeFuzzer fuzzer{chosenSignature, returnType, rng_}; - VELOX_CHECK_EQ(fuzzer.fuzzArgumentTypes(options_.maxNumVarArgs), true); - auto& argumentTypes = fuzzer.argumentTypes(); + + std::vector argumentTypes; + if (fuzzer.fuzzArgumentTypes(options_.maxNumVarArgs)) { + // Use the argument fuzzer to generate argument types. + argumentTypes = fuzzer.argumentTypes(); + } else { + auto it = argGenerators_.find(functionName); + // Since the argument type fuzzer cannot produce argument types, argument + // generators should be provided. + VELOX_CHECK( + it != argGenerators_.end(), + "Cannot generate argument types for {} with return type {}.", + functionName, + returnType->toString()); + argumentTypes = it->second->generateArgs(chosenSignature, returnType, rng_); + if (argumentTypes.empty()) { + return nullptr; + } + } + auto constantArguments = chosenSignature.constantArguments(); // ArgumentFuzzer may generate duplicate arguments if the signature's diff --git a/velox/expression/fuzzer/ExpressionFuzzer.h b/velox/expression/fuzzer/ExpressionFuzzer.h index 1e0dc9a74f74..a703ddb7eef9 100644 --- a/velox/expression/fuzzer/ExpressionFuzzer.h +++ b/velox/expression/fuzzer/ExpressionFuzzer.h @@ -19,6 +19,7 @@ #include "velox/core/ITypedExpr.h" #include "velox/core/QueryCtx.h" #include "velox/expression/Expr.h" +#include "velox/expression/fuzzer/ArgGenerator.h" #include "velox/expression/fuzzer/FuzzerToolkit.h" #include "velox/expression/tests/ExpressionVerifier.h" #include "velox/functions/FunctionRegistry.h" @@ -48,6 +49,10 @@ class ExpressionFuzzer { // types. bool enableComplexTypes = false; + // Enable testing of function signatures with decimal argument or return + // types. + bool enableDecimalType = false; + // Enable generation of expressions where one input column can be used by // multiple subexpressions. bool enableColumnReuse = false; @@ -103,7 +108,9 @@ class ExpressionFuzzer { FunctionSignatureMap signatureMap, size_t initialSeed, const std::shared_ptr& vectorFuzzer, - const std::optional& options = std::nullopt); + const std::optional& options = std::nullopt, + const std::unordered_map>& + argGenerators = {}); template void registerFuncOverride(TFunc func, const std::string& name); @@ -416,6 +423,9 @@ class ExpressionFuzzer { } state; friend class ExpressionFuzzerUnitTest; + + // Maps from function name to a specific generator of argument types. + std::unordered_map> argGenerators_; }; } // namespace facebook::velox::fuzzer diff --git a/velox/expression/fuzzer/ExpressionFuzzerTest.cpp b/velox/expression/fuzzer/ExpressionFuzzerTest.cpp index 30dd8dc252d1..90e2c7222681 100644 --- a/velox/expression/fuzzer/ExpressionFuzzerTest.cpp +++ b/velox/expression/fuzzer/ExpressionFuzzerTest.cpp @@ -18,7 +18,14 @@ #include #include +#include "velox/expression/fuzzer/ArgGenerator.h" #include "velox/expression/fuzzer/FuzzerRunner.h" +#include "velox/functions/prestosql/fuzzer/DivideArgGenerator.h" +#include "velox/functions/prestosql/fuzzer/FloorAndRoundArgGenerator.h" +#include "velox/functions/prestosql/fuzzer/ModulusArgGenerator.h" +#include "velox/functions/prestosql/fuzzer/MultiplyArgGenerator.h" +#include "velox/functions/prestosql/fuzzer/PlusMinusArgGenerator.h" +#include "velox/functions/prestosql/fuzzer/TruncateArgGenerator.h" #include "velox/functions/prestosql/registration/RegistrationFunctions.h" DEFINE_int64( @@ -27,6 +34,8 @@ DEFINE_int64( "Initial seed for random number generator used to reproduce previous " "results (0 means start with random seed)."); +using namespace facebook::velox::exec::test; +using facebook::velox::fuzzer::ArgGenerator; using facebook::velox::fuzzer::FuzzerRunner; int main(int argc, char** argv) { @@ -69,5 +78,16 @@ int main(int argc, char** argv) { "regexp_split", }; size_t initialSeed = FLAGS_seed == 0 ? std::time(nullptr) : FLAGS_seed; - return FuzzerRunner::run(initialSeed, skipFunctions, {{}}); + + std::unordered_map> argGenerators = + {{"plus", std::make_shared()}, + {"minus", std::make_shared()}, + {"multiply", std::make_shared()}, + {"divide", std::make_shared()}, + {"floor", std::make_shared()}, + {"round", std::make_shared()}, + {"mod", std::make_shared()}, + {"truncate", std::make_shared()}}; + + return FuzzerRunner::run(initialSeed, skipFunctions, {{}}, argGenerators); } diff --git a/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp b/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp index 1415db5364fe..cbd7979bbf51 100644 --- a/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp +++ b/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp @@ -80,7 +80,9 @@ RowVectorPtr wrapChildren( ExpressionFuzzerVerifier::ExpressionFuzzerVerifier( const FunctionSignatureMap& signatureMap, size_t initialSeed, - const ExpressionFuzzerVerifier::Options& options) + const ExpressionFuzzerVerifier::Options& options, + const std::unordered_map>& + argGenerators) : options_(options), queryCtx_(core::QueryCtx::create( nullptr, @@ -98,7 +100,8 @@ ExpressionFuzzerVerifier::ExpressionFuzzerVerifier( signatureMap, initialSeed, vectorFuzzer_, - options.expressionFuzzerOptions) { + options.expressionFuzzerOptions, + argGenerators) { seed(initialSeed); // Init stats and register listener. diff --git a/velox/expression/fuzzer/ExpressionFuzzerVerifier.h b/velox/expression/fuzzer/ExpressionFuzzerVerifier.h index f651ad554143..be00ddca9bf1 100644 --- a/velox/expression/fuzzer/ExpressionFuzzerVerifier.h +++ b/velox/expression/fuzzer/ExpressionFuzzerVerifier.h @@ -51,7 +51,9 @@ class ExpressionFuzzerVerifier { ExpressionFuzzerVerifier( const FunctionSignatureMap& signatureMap, size_t initialSeed, - const Options& options); + const Options& options, + const std::unordered_map>& + argGenerators); // This function starts the test that is performed by the // ExpressionFuzzerVerifier which is generating random expressions and diff --git a/velox/expression/fuzzer/FuzzerRunner.cpp b/velox/expression/fuzzer/FuzzerRunner.cpp index 56c58a365841..97ef1fc7bd67 100644 --- a/velox/expression/fuzzer/FuzzerRunner.cpp +++ b/velox/expression/fuzzer/FuzzerRunner.cpp @@ -121,6 +121,11 @@ DEFINE_bool( false, "Enable testing of function signatures with complex argument or return types."); +DEFINE_bool( + velox_fuzzer_enable_decimal_type, + false, + "Enable testing of function signatures with decimal argument or return types."); + DEFINE_bool( velox_fuzzer_enable_column_reuse, false, @@ -168,6 +173,7 @@ ExpressionFuzzer::Options getExpressionFuzzerOptions( opts.enableVariadicSignatures = FLAGS_enable_variadic_signatures; opts.enableDereference = FLAGS_enable_dereference; opts.enableComplexTypes = FLAGS_velox_fuzzer_enable_complex_types; + opts.enableDecimalType = FLAGS_velox_fuzzer_enable_decimal_type; opts.enableColumnReuse = FLAGS_velox_fuzzer_enable_column_reuse; opts.enableExpressionReuse = FLAGS_velox_fuzzer_enable_expression_reuse; opts.functionTickets = FLAGS_assign_function_tickets; @@ -204,8 +210,10 @@ ExpressionFuzzerVerifier::Options getExpressionFuzzerVerifierOptions( int FuzzerRunner::run( size_t seed, const std::unordered_set& skipFunctions, - const std::unordered_map& queryConfigs) { - runFromGtest(seed, skipFunctions, queryConfigs); + const std::unordered_map& queryConfigs, + const std::unordered_map>& + argGenerators) { + runFromGtest(seed, skipFunctions, queryConfigs, argGenerators); return RUN_ALL_TESTS(); } @@ -213,13 +221,16 @@ int FuzzerRunner::run( void FuzzerRunner::runFromGtest( size_t seed, const std::unordered_set& skipFunctions, - const std::unordered_map& queryConfigs) { + const std::unordered_map& queryConfigs, + const std::unordered_map>& + argGenerators) { memory::MemoryManager::testingSetInstance({}); auto signatures = facebook::velox::getFunctionSignatures(); ExpressionFuzzerVerifier( signatures, seed, - getExpressionFuzzerVerifierOptions(skipFunctions, queryConfigs)) + getExpressionFuzzerVerifierOptions(skipFunctions, queryConfigs), + argGenerators) .go(); } } // namespace facebook::velox::fuzzer diff --git a/velox/expression/fuzzer/FuzzerRunner.h b/velox/expression/fuzzer/FuzzerRunner.h index 0eda0ecd1d7a..c7c5cda54c65 100644 --- a/velox/expression/fuzzer/FuzzerRunner.h +++ b/velox/expression/fuzzer/FuzzerRunner.h @@ -33,12 +33,16 @@ class FuzzerRunner { static int run( size_t seed, const std::unordered_set& skipFunctions, - const std::unordered_map& queryConfigs); + const std::unordered_map& queryConfigs, + const std::unordered_map>& + argGenerators); static void runFromGtest( size_t seed, const std::unordered_set& skipFunctions, - const std::unordered_map& queryConfigs); + const std::unordered_map& queryConfigs, + const std::unordered_map>& + argGenerators); }; } // namespace facebook::velox::fuzzer diff --git a/velox/expression/fuzzer/SparkExpressionFuzzerTest.cpp b/velox/expression/fuzzer/SparkExpressionFuzzerTest.cpp index ffba105e2e08..ba5abd02bca3 100644 --- a/velox/expression/fuzzer/SparkExpressionFuzzerTest.cpp +++ b/velox/expression/fuzzer/SparkExpressionFuzzerTest.cpp @@ -24,6 +24,14 @@ #include "velox/expression/fuzzer/FuzzerRunner.h" #include "velox/functions/sparksql/Register.h" +#include "velox/functions/sparksql/fuzzer/AddSubtractArgGenerator.h" +#include "velox/functions/sparksql/fuzzer/DivideArgGenerator.h" +#include "velox/functions/sparksql/fuzzer/MakeTimestampArgGenerator.h" +#include "velox/functions/sparksql/fuzzer/MultiplyArgGenerator.h" +#include "velox/functions/sparksql/fuzzer/UnscaledValueArgGenerator.h" + +using namespace facebook::velox::functions::sparksql::fuzzer; +using facebook::velox::fuzzer::ArgGenerator; DEFINE_int64( seed, @@ -58,7 +66,18 @@ int main(int argc, char** argv) { // Required by spark_partition_id function. std::unordered_map queryConfigs = { - {facebook::velox::core::QueryConfig::kSparkPartitionId, "123"}}; + {facebook::velox::core::QueryConfig::kSparkPartitionId, "123"}, + {facebook::velox::core::QueryConfig::kSessionTimezone, + "America/Los_Angeles"}}; + + std::unordered_map> argGenerators = + {{"add", std::make_shared()}, + {"subtract", std::make_shared()}, + {"multiply", std::make_shared()}, + {"divide", std::make_shared()}, + {"unscaled_value", std::make_shared()}, + {"make_timestamp", std::make_shared()}}; - return FuzzerRunner::run(FLAGS_seed, skipFunctions, queryConfigs); + return FuzzerRunner::run( + FLAGS_seed, skipFunctions, queryConfigs, argGenerators); } diff --git a/velox/expression/fuzzer/tests/ArgumentTypeFuzzerTest.cpp b/velox/expression/fuzzer/tests/ArgumentTypeFuzzerTest.cpp index 32971df22e0d..e6da1023b215 100644 --- a/velox/expression/fuzzer/tests/ArgumentTypeFuzzerTest.cpp +++ b/velox/expression/fuzzer/tests/ArgumentTypeFuzzerTest.cpp @@ -239,6 +239,21 @@ TEST_F(ArgumentTypeFuzzerTest, unsupported) { .argumentType("decimal(b_precision, b_scale)") .build(); + testFuzzingFailure(signature, ROW({ARRAY(DECIMAL(13, 6))})); + + // Constraints on the argument types are not supported. + signature = exec::FunctionSignatureBuilder() + .integerVariable("a_scale") + .integerVariable("b_scale") + .integerVariable("a_precision", "a_scale + 1") + .integerVariable("b_precision") + .integerVariable("r_precision") + .integerVariable("r_scale") + .returnType("decimal(r_precision, r_scale)") + .argumentType("decimal(a_precision, a_scale)") + .argumentType("decimal(b_precision, b_scale)") + .build(); + testFuzzingFailure(signature, DECIMAL(13, 6)); } @@ -557,6 +572,29 @@ TEST_F(ArgumentTypeFuzzerTest, fuzzDecimalArgumentTypes) { EXPECT_TRUE(argTypes[1]->isDecimal()); EXPECT_EQ(argTypes[0]->toString(), argTypes[2]->toString()); EXPECT_EQ(argTypes[1]->toString(), argTypes[3]->toString()); + + // Decimal argument types are nested in complex types. + signature = exec::FunctionSignatureBuilder() + .integerVariable("a_scale") + .integerVariable("b_scale") + .integerVariable("a_precision") + .integerVariable("b_precision") + .integerVariable("r_precision") + .integerVariable("r_scale") + .returnType("decimal(r_precision, r_scale)") + .argumentType("row(array(decimal(a_precision, a_scale)))") + .argumentType("row(array(decimal(b_precision, b_scale)))") + .build(); + argTypes = fuzzArgumentTypes(*signature, DECIMAL(10, 7)); + ASSERT_EQ(2, argTypes.size()); + EXPECT_TRUE(argTypes[0]->isRow()); + EXPECT_TRUE(argTypes[0]->asRow().childAt(0)->isArray()); + EXPECT_TRUE( + argTypes[0]->asRow().childAt(0)->asArray().elementType()->isDecimal()); + EXPECT_TRUE(argTypes[1]->isRow()); + EXPECT_TRUE(argTypes[1]->asRow().childAt(0)->isArray()); + EXPECT_TRUE( + argTypes[1]->asRow().childAt(0)->asArray().elementType()->isDecimal()); } TEST_F(ArgumentTypeFuzzerTest, fuzzDecimalReturnType) { @@ -647,6 +685,27 @@ TEST_F(ArgumentTypeFuzzerTest, fuzzDecimalReturnType) { returnType = fuzzReturnType(*signature); EXPECT_EQ(DECIMAL(10, 7)->toString(), returnType->toString()); + + // Decimal return type is nested in complex types. + signature = + exec::FunctionSignatureBuilder() + .integerVariable("a_scale") + .integerVariable("b_scale") + .integerVariable("a_precision") + .integerVariable("b_precision") + .returnType( + "row(array(decimal(a_precision, a_scale)), array(decimal(b_precision, b_scale)))") + .argumentType("decimal(a_precision, a_scale)") + .argumentType("decimal(b_precision, b_scale)") + .build(); + returnType = fuzzReturnType(*signature); + EXPECT_TRUE(returnType->isRow()); + EXPECT_TRUE(returnType->asRow().childAt(0)->isArray()); + EXPECT_TRUE(returnType->asRow().childAt(1)->isArray()); + EXPECT_TRUE( + returnType->asRow().childAt(0)->asArray().elementType()->isDecimal()); + EXPECT_TRUE( + returnType->asRow().childAt(1)->asArray().elementType()->isDecimal()); } } // namespace facebook::velox::fuzzer::test diff --git a/velox/functions/sparksql/MakeTimestamp.cpp b/velox/functions/sparksql/MakeTimestamp.cpp index b2904d25540e..89226954fddb 100644 --- a/velox/functions/sparksql/MakeTimestamp.cpp +++ b/velox/functions/sparksql/MakeTimestamp.cpp @@ -157,7 +157,8 @@ class MakeTimestampFunction : public exec::VectorFunction { static std::vector> signatures() { return { exec::FunctionSignatureBuilder() - .integerVariable("precision") + // precision <= 18. + .integerVariable("precision", "min(precision, 18)") .returnType("timestamp") .argumentType("integer") .argumentType("integer") @@ -167,7 +168,8 @@ class MakeTimestampFunction : public exec::VectorFunction { .argumentType("decimal(precision, 6)") .build(), exec::FunctionSignatureBuilder() - .integerVariable("precision") + // precision <= 18. + .integerVariable("precision", "min(precision, 18)") .returnType("timestamp") .argumentType("integer") .argumentType("integer") diff --git a/velox/functions/sparksql/UnscaledValueFunction.cpp b/velox/functions/sparksql/UnscaledValueFunction.cpp index 7833db779d35..3f7858a2d62e 100644 --- a/velox/functions/sparksql/UnscaledValueFunction.cpp +++ b/velox/functions/sparksql/UnscaledValueFunction.cpp @@ -49,7 +49,8 @@ class UnscaledValueFunction final : public exec::VectorFunction { std::vector> unscaledValueSignatures() { return {exec::FunctionSignatureBuilder() - .integerVariable("precision") + // precision <= 18. + .integerVariable("precision", "min(precision, 18)") .integerVariable("scale") .returnType("bigint") .argumentType("DECIMAL(precision, scale)")