From 70e0b27da6e0dcf91d7ab8cba375226d89ba6741 Mon Sep 17 00:00:00 2001 From: rui-mo Date: Fri, 29 Mar 2024 14:55:28 +0800 Subject: [PATCH] refactor --- velox/expression/tests/ArgumentGenerator.h | 36 +++++ velox/expression/tests/CMakeLists.txt | 10 +- velox/expression/tests/ExprTest.cpp | 11 -- velox/expression/tests/ExpressionFuzzer.cpp | 128 ++---------------- velox/expression/tests/ExpressionFuzzer.h | 55 ++++---- .../expression/tests/ExpressionFuzzerTest.cpp | 16 ++- .../tests/ExpressionFuzzerUnitTest.cpp | 4 + .../tests/ExpressionFuzzerVerifier.cpp | 5 +- .../tests/ExpressionFuzzerVerifier.h | 4 +- velox/expression/tests/FuzzerRunner.cpp | 13 +- velox/expression/tests/FuzzerRunner.h | 9 +- .../tests/SparkExpressionFuzzerTest.cpp | 16 ++- .../functions/prestosql/fuzzer/CMakeLists.txt | 3 + .../fuzzer/ExtremeArgumentGenerator.cpp | 60 ++++++++ .../fuzzer/ExtremeArgumentGenerator.h | 36 +++++ .../functions/sparksql/fuzzer/CMakeLists.txt | 5 + .../fuzzer/MakeTimestampArgumentGenerator.cpp | 65 +++++++++ .../fuzzer/MakeTimestampArgumentGenerator.h | 37 +++++ .../fuzzer/UnscaledValueArgumentGenerator.cpp | 46 +++++++ .../fuzzer/UnscaledValueArgumentGenerator.h | 36 +++++ 20 files changed, 424 insertions(+), 171 deletions(-) create mode 100644 velox/expression/tests/ArgumentGenerator.h create mode 100644 velox/functions/prestosql/fuzzer/ExtremeArgumentGenerator.cpp create mode 100644 velox/functions/prestosql/fuzzer/ExtremeArgumentGenerator.h create mode 100644 velox/functions/sparksql/fuzzer/MakeTimestampArgumentGenerator.cpp create mode 100644 velox/functions/sparksql/fuzzer/MakeTimestampArgumentGenerator.h create mode 100644 velox/functions/sparksql/fuzzer/UnscaledValueArgumentGenerator.cpp create mode 100644 velox/functions/sparksql/fuzzer/UnscaledValueArgumentGenerator.h diff --git a/velox/expression/tests/ArgumentGenerator.h b/velox/expression/tests/ArgumentGenerator.h new file mode 100644 index 0000000000000..46ee4d9b58d16 --- /dev/null +++ b/velox/expression/tests/ArgumentGenerator.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "velox/core/ITypedExpr.h" +#include "velox/expression/tests/utils/FuzzerToolkit.h" + +namespace facebook::velox::test { + +class ExpressionFuzzer; + +class ArgumentGenerator { + public: + virtual ~ArgumentGenerator() = default; + + // Generates function arguments of the specified signature. + virtual std::vector generate( + ExpressionFuzzer* expressionFuzzer, + const CallableSignature& input, + int32_t maxNumVarArgs) = 0; +}; + +} // namespace facebook::velox::test diff --git a/velox/expression/tests/CMakeLists.txt b/velox/expression/tests/CMakeLists.txt index 6958e22f9276d..a7c0491e4255b 100644 --- a/velox/expression/tests/CMakeLists.txt +++ b/velox/expression/tests/CMakeLists.txt @@ -153,10 +153,12 @@ target_link_libraries( add_executable(velox_expression_fuzzer_test ExpressionFuzzerTest.cpp) -target_link_libraries(velox_expression_fuzzer_test velox_expression_fuzzer - velox_functions_prestosql gtest gtest_main) +target_link_libraries( + velox_expression_fuzzer_test velox_expression_fuzzer_utility + velox_expression_fuzzer velox_functions_prestosql gtest gtest_main) add_executable(spark_expression_fuzzer_test SparkExpressionFuzzerTest.cpp) -target_link_libraries(spark_expression_fuzzer_test velox_expression_fuzzer - velox_functions_spark gtest gtest_main) +target_link_libraries( + spark_expression_fuzzer_test spark_expression_fuzzer_utility + velox_expression_fuzzer velox_functions_spark gtest gtest_main) diff --git a/velox/expression/tests/ExprTest.cpp b/velox/expression/tests/ExprTest.cpp index d55c55b91e55c..f052526225224 100644 --- a/velox/expression/tests/ExprTest.cpp +++ b/velox/expression/tests/ExprTest.cpp @@ -2615,17 +2615,6 @@ TEST_P(ParameterizedExprTest, constantToSql) { ASSERT_EQ(toSql(2134456LL), "'2134456'::BIGINT"); ASSERT_EQ(toSql(variant::null(TypeKind::BIGINT)), "NULL::BIGINT"); - ASSERT_EQ(toSql(2134456LL, DECIMAL(18, 2)), "'21344.56'::DECIMAL(18, 2)"); - ASSERT_EQ( - toSql(variant::null(TypeKind::BIGINT), DECIMAL(18, 2)), - "NULL::DECIMAL(18, 2)"); - ASSERT_EQ( - toSql((int128_t)1'000'000'000'000'000'000, DECIMAL(38, 2)), - "'10000000000000000.00'::DECIMAL(38, 2)"); - ASSERT_EQ( - toSql(variant::null(TypeKind::HUGEINT), DECIMAL(38, 2)), - "NULL::DECIMAL(38, 2)"); - ASSERT_EQ(toSql(18'506, DATE()), "'2020-09-01'::DATE"); ASSERT_EQ(toSql(variant::null(TypeKind::INTEGER), DATE()), "NULL::DATE"); diff --git a/velox/expression/tests/ExpressionFuzzer.cpp b/velox/expression/tests/ExpressionFuzzer.cpp index 98f4c703e2911..6fa316b7dd78f 100644 --- a/velox/expression/tests/ExpressionFuzzer.cpp +++ b/velox/expression/tests/ExpressionFuzzer.cpp @@ -531,10 +531,13 @@ ExpressionFuzzer::ExpressionFuzzer( FunctionSignatureMap signatureMap, size_t initialSeed, const std::shared_ptr& vectorFuzzer, + const std::unordered_map>& + customArgumentGenerators, const std::optional& options) : options_(options.value_or(Options())), vectorFuzzer_(vectorFuzzer), - state{rng_, std::max(1, options_.maxLevelOfNesting)} { + state{rng_, std::max(1, options_.maxLevelOfNesting)}, + customArgumentGenerators_(customArgumentGenerators) { VELOX_CHECK(vectorFuzzer, "Vector fuzzer must be provided"); seed(initialSeed); @@ -711,13 +714,6 @@ ExpressionFuzzer::ExpressionFuzzer( // Register function override (for cases where we want to restrict the types // or parameters we pass to functions). registerFuncOverride(&ExpressionFuzzer::generateSwitchArgs, "switch"); - registerFuncOverride( - &ExpressionFuzzer::generateExtremeFunctionArgs, "greatest"); - registerFuncOverride(&ExpressionFuzzer::generateExtremeFunctionArgs, "least"); - registerFuncOverride( - &ExpressionFuzzer::generateMakeTimestampArgs, "make_timestamp"); - registerFuncOverride( - &ExpressionFuzzer::generateUnscaledValueArgs, "unscaled_value"); } void ExpressionFuzzer::getTicketsForFunctions() { @@ -950,84 +946,6 @@ core::TypedExprPtr ExpressionFuzzer::generateArg( } } -std::vector ExpressionFuzzer::generateExtremeFunctionArgs( - const CallableSignature& input) { - const auto argTypes = input.args; - VELOX_CHECK_GE( - argTypes.size(), - 1, - "At least one input is expected from the template signature."); - if (!argTypes[0]->isDecimal()) { - return generateArgs(input); - } - - auto numVarArgs = - !input.variableArity ? 0 : rand32(0, options_.maxNumVarArgs); - std::vector inputExpressions; - inputExpressions.reserve(argTypes.size() + numVarArgs); - inputExpressions.emplace_back( - generateArg(argTypes.at(0), input.constantArgs.at(0))); - - // Append varargs to the argument list. - for (int i = 0; i < numVarArgs; i++) { - core::TypedExprPtr argExpr; - // The varargs need to be generated following the result type of the first - // argument. But when nested expression is generated, that cannot be - // guaranteed as argument precisions and scales cannot be inferred from the - // result type through a decimal function signature. Given this limitation, - // generate constant or column only. - const auto argType = inputExpressions[0]->type(); - if (rand32(0, 1) == kArgConstant) { - argExpr = generateArgConstant(argType); - } else { - argExpr = generateArgColumn(argType); - } - inputExpressions.emplace_back(argExpr); - } - return inputExpressions; -} - -std::vector ExpressionFuzzer::generateMakeTimestampArgs( - const CallableSignature& input) { - VELOX_CHECK_GE( - input.args.size(), - 6, - "At least six inputs are expected from the template signature."); - bool useTimezone = vectorFuzzer_->coinToss(0.5); - std::vector inputExpressions; - inputExpressions.reserve(6); - for (int index = 0; index < 5; ++index) { - inputExpressions.emplace_back(generateArg(input.args[index])); - } - - // The required result type of the sixth argument is a short decimal type with - // scale being 6. But when nested expression is generated, that cannot be - // guaranteed as argument precisions and scales cannot be inferred from the - // result type through a decimal function signature. Given this limitation, - // generate constant or column only. - core::TypedExprPtr argExpr; - if (rand32(0, 1) == kArgConstant) { - argExpr = generateArgConstant(input.args[5]); - } else { - argExpr = generateArgColumn(input.args[5]); - } - inputExpressions.emplace_back(argExpr); - - if (input.args.size() == 7) { - // The 7th. argument cannot be randomly generated as it should be a valid - // timezone string. - std::vector timezoneSet = { - "Asia/Kolkata", - "America/Los_Angeles", - "Canada/Atlantic", - "+08:00", - "-10:00"}; - inputExpressions.emplace_back(std::make_shared( - VARCHAR(), variant(timezoneSet[rand32(0, 4)]))); - } - return inputExpressions; -} - std::vector ExpressionFuzzer::generateSwitchArgs( const CallableSignature& input) { VELOX_CHECK_EQ( @@ -1050,29 +968,6 @@ std::vector ExpressionFuzzer::generateSwitchArgs( return inputExpressions; } -std::vector ExpressionFuzzer::generateUnscaledValueArgs( - const CallableSignature& input) { - VELOX_CHECK_EQ( - input.args.size(), - 1, - "Only one input is expected from the template signature."); - - // The required result type of input argument is a short decimal type. But - // when nested expression is generated, that cannot be guaranteed as argument - // precisions and scales cannot be inferred from the result type through a - // decimal function signature. Given this limitation, generate constant or - // column only. - std::vector inputExpressions; - core::TypedExprPtr argExpr; - if (rand32(0, 1) == kArgConstant) { - argExpr = generateArgConstant(input.args[0]); - } else { - argExpr = generateArgColumn(input.args[0]); - } - inputExpressions.emplace_back(argExpr); - return inputExpressions; -} - ExpressionFuzzer::FuzzedExpressionData ExpressionFuzzer::fuzzExpressions( const RowTypePtr& outType) { state.reset(); @@ -1167,6 +1062,10 @@ core::TypedExprPtr ExpressionFuzzer::generateExpression( std::vector ExpressionFuzzer::getArgsForCallable( const CallableSignature& callable) { + if (customArgumentGenerators_.count(callable.name)) { + return customArgumentGenerators_[callable.name]->generate( + this, callable, options_.maxNumVarArgs); + } auto funcIt = funcArgOverrides_.find(callable.name); if (funcIt == funcArgOverrides_.end()) { return generateArgs(callable); @@ -1180,7 +1079,6 @@ TypePtr ExpressionFuzzer::getConstrainedOutputType( if (signature == nullptr) { return nullptr; } - // Checks if any variable is integer constrained, and get the decimal name // style. bool integerConstrained = false; @@ -1259,10 +1157,10 @@ core::TypedExprPtr ExpressionFuzzer::getCallExprFromCallable( // For a decimal function (especially a nested one), as argument precisions // and scales are randomly generated, callable.returnType does not follow the // required constraints, and the matched result type needs to be recalculated - // from the argument types. If a constrained output type can be generated, use - // it to avoid breaking the constraints between input types and output types. - // Otherwise, generate a CallTypedExpr with type because callable.returnType - // may not have the required field names. + // from the argument types. If function signature is provided, generates a + // constrained type to avoid breaking the constraints between input types and + // output types. Otherwise, generate a CallTypedExpr with type because + // callable.returnType may not have the required field names. const auto constrainedType = getConstrainedOutputType(args, signature); return std::make_shared( constrainedType ? constrainedType : type, args, callable.name); @@ -1347,7 +1245,7 @@ core::TypedExprPtr ExpressionFuzzer::generateExpressionFromConcreteSignatures( } markSelected(chosen->name); - return getCallExprFromCallable(*chosen, returnType, nullptr); + return getCallExprFromCallable(*chosen, returnType); } const SignatureTemplate* ExpressionFuzzer::chooseRandomSignatureTemplate( diff --git a/velox/expression/tests/ExpressionFuzzer.h b/velox/expression/tests/ExpressionFuzzer.h index 84eb919c3a279..ce235737a9014 100644 --- a/velox/expression/tests/ExpressionFuzzer.h +++ b/velox/expression/tests/ExpressionFuzzer.h @@ -19,6 +19,7 @@ #include "velox/core/ITypedExpr.h" #include "velox/core/QueryCtx.h" #include "velox/expression/Expr.h" +#include "velox/expression/tests/ArgumentGenerator.h" #include "velox/expression/tests/ExpressionVerifier.h" #include "velox/expression/tests/utils/FuzzerToolkit.h" #include "velox/functions/FunctionRegistry.h" @@ -107,6 +108,8 @@ class ExpressionFuzzer { FunctionSignatureMap signatureMap, size_t initialSeed, const std::shared_ptr& vectorFuzzer, + const std::unordered_map>& + customArgumentGenerators, const std::optional& options = std::nullopt); template @@ -195,6 +198,19 @@ class ExpressionFuzzer { RowTypePtr fuzzRowReturnType(size_t size, char prefix = 'p'); + core::TypedExprPtr generateArg(const TypePtr& arg); + + core::TypedExprPtr generateArg(const TypePtr& arg, bool isConstant); + + std::vector generateArgs(const CallableSignature& input); + + core::TypedExprPtr generateArgColumn(const TypePtr& arg); + + core::TypedExprPtr generateArgConstant(const TypePtr& arg); + + // Returns random integer between min and max inclusive. + int32_t rand32(int32_t min, int32_t max); + private: // Either generates a new expression of the required return type or if // already generated expressions of the same return type exist then there is @@ -218,12 +234,6 @@ class ExpressionFuzzer { void appendConjunctSignatures(); - core::TypedExprPtr generateArgConstant(const TypePtr& arg); - - core::TypedExprPtr generateArgColumn(const TypePtr& arg); - - core::TypedExprPtr generateArg(const TypePtr& arg); - // Given lambda argument type, generate matching LambdaTypedExpr. // // The 'arg' specifies inputs types and result type for the lambda. This @@ -234,24 +244,14 @@ class ExpressionFuzzer { // all input. The constant value is generated using 'generateArgConstant'. core::TypedExprPtr generateArgFunction(const TypePtr& arg); - std::vector generateArgs(const CallableSignature& input); - std::vector generateArgs( const std::vector& argTypes, const std::vector& constantArgs, uint32_t numVarArgs = 0); - core::TypedExprPtr generateArg(const TypePtr& arg, bool isConstant); - - /// Specialization for the "greatest" and "least" functions: decimal varargs - /// need to be constant or column. - std::vector generateExtremeFunctionArgs( - const CallableSignature& input); - - /// Specialization for the "make_timestamp" function: 1) decimal argument - /// needs to be constant or column. 2) timezone argument needs to be valid. - std::vector generateMakeTimestampArgs( - const CallableSignature& input); + // Return a vector of expressions for each argument of callable in order. + std::vector getArgsForCallable( + const CallableSignature& callable); /// Specialization for the "switch" function. Takes in a signature that is /// of the form Switch (condition, then): boolean, T -> T where the type @@ -262,15 +262,6 @@ class ExpressionFuzzer { std::vector generateSwitchArgs( const CallableSignature& input); - /// Specialization for the "unscaled_value" function: decimal argument needs - /// to be constant or column. - std::vector generateUnscaledValueArgs( - const CallableSignature& input); - - // Return a vector of expressions for each argument of callable in order. - std::vector getArgsForCallable( - const CallableSignature& callable); - /// Given the argument types, calculates the return type of a decimal function /// by evaluating constraints. TypePtr getConstrainedOutputType( @@ -352,9 +343,6 @@ class ExpressionFuzzer { state.expressionStats_[funcName]++; } - // Returns random integer between min and max inclusive. - int32_t rand32(int32_t min, int32_t max); - static const inline std::string kTypeParameterName = "T"; const Options options_; @@ -441,6 +429,11 @@ class ExpressionFuzzer { int32_t remainingLevelOfNesting_; } state; + + // Maps from function name to a custom arguments generator. + std::unordered_map> + customArgumentGenerators_; + friend class ExpressionFuzzerUnitTest; }; diff --git a/velox/expression/tests/ExpressionFuzzerTest.cpp b/velox/expression/tests/ExpressionFuzzerTest.cpp index 1c6a675bfdf73..68cddccf86a00 100644 --- a/velox/expression/tests/ExpressionFuzzerTest.cpp +++ b/velox/expression/tests/ExpressionFuzzerTest.cpp @@ -19,6 +19,7 @@ #include #include "velox/expression/tests/FuzzerRunner.h" +#include "velox/functions/prestosql/fuzzer/ExtremeArgumentGenerator.h" #include "velox/functions/prestosql/registration/RegistrationFunctions.h" DEFINE_int64( @@ -65,6 +66,19 @@ int main(int argc, char** argv) { "regexp_extract_all", "regexp_like", }; + + const std::unordered_map< + std::string, + std::shared_ptr> + customArgumentGenerators = { + {"greatest", + std::make_shared< + facebook::velox::functions::test::ExtremeArgumentGenerator>()}, + {"least", + std::make_shared< + facebook::velox::functions::test::ExtremeArgumentGenerator>()}}; + size_t initialSeed = FLAGS_seed == 0 ? std::time(nullptr) : FLAGS_seed; - return FuzzerRunner::run(initialSeed, skipFunctions, {{}}); + return FuzzerRunner::run( + initialSeed, skipFunctions, {{}}, customArgumentGenerators); } diff --git a/velox/expression/tests/ExpressionFuzzerUnitTest.cpp b/velox/expression/tests/ExpressionFuzzerUnitTest.cpp index 9fd3a09b3a047..043e45a35fd03 100644 --- a/velox/expression/tests/ExpressionFuzzerUnitTest.cpp +++ b/velox/expression/tests/ExpressionFuzzerUnitTest.cpp @@ -77,6 +77,7 @@ TEST_F(ExpressionFuzzerUnitTest, restrictedLevelOfNesting) { velox::getFunctionSignatures(), 0, vectorfuzzer, + {}, makeOptionsWithMaxLevelNesting(maxLevelOfNesting), }; @@ -116,6 +117,7 @@ TEST_F(ExpressionFuzzerUnitTest, reproduceExpressionWithSeed) { velox::getFunctionSignatures(), 1234567, vectorfuzzer, + {}, makeOptionsWithMaxLevelNesting(5)}; for (auto i = 0; i < 10; ++i) { firstGeneration.push_back( @@ -142,6 +144,7 @@ TEST_F(ExpressionFuzzerUnitTest, exprBank) { velox::getFunctionSignatures(), 0, vectorfuzzer, + {}, makeOptionsWithMaxLevelNesting(maxLevelOfNesting)}; ExpressionFuzzer::ExprBank exprBank(seed, maxLevelOfNesting); for (int i = 0; i < 5000; ++i) { @@ -170,6 +173,7 @@ TEST_F(ExpressionFuzzerUnitTest, exprBank) { velox::getFunctionSignatures(), 0, vectorfuzzer, + {}, makeOptionsWithMaxLevelNesting(maxLevelOfNesting)}; ExpressionFuzzer::ExprBank exprBank(seed, maxLevelOfNesting); for (int i = 0; i < 1000; ++i) { diff --git a/velox/expression/tests/ExpressionFuzzerVerifier.cpp b/velox/expression/tests/ExpressionFuzzerVerifier.cpp index 8e36fd739f21c..94ce099a72f41 100644 --- a/velox/expression/tests/ExpressionFuzzerVerifier.cpp +++ b/velox/expression/tests/ExpressionFuzzerVerifier.cpp @@ -80,7 +80,9 @@ RowVectorPtr wrapChildren( ExpressionFuzzerVerifier::ExpressionFuzzerVerifier( const FunctionSignatureMap& signatureMap, size_t initialSeed, - const ExpressionFuzzerVerifier::Options& options) + const ExpressionFuzzerVerifier::Options& options, + const std::unordered_map>& + customArgumentGenerators) : options_(options), queryCtx_(std::make_shared( nullptr, @@ -98,6 +100,7 @@ ExpressionFuzzerVerifier::ExpressionFuzzerVerifier( signatureMap, initialSeed, vectorFuzzer_, + customArgumentGenerators, options.expressionFuzzerOptions) { seed(initialSeed); diff --git a/velox/expression/tests/ExpressionFuzzerVerifier.h b/velox/expression/tests/ExpressionFuzzerVerifier.h index 2f85b5d52bc71..a9e9b6392fa16 100644 --- a/velox/expression/tests/ExpressionFuzzerVerifier.h +++ b/velox/expression/tests/ExpressionFuzzerVerifier.h @@ -51,7 +51,9 @@ class ExpressionFuzzerVerifier { ExpressionFuzzerVerifier( const FunctionSignatureMap& signatureMap, size_t initialSeed, - const Options& options); + const Options& options, + const std::unordered_map>& + customArgumentGenerators); // This function starts the test that is performed by the // ExpressionFuzzerVerifier which is generating random expressions and diff --git a/velox/expression/tests/FuzzerRunner.cpp b/velox/expression/tests/FuzzerRunner.cpp index e947f147c6853..c05333f8f7f93 100644 --- a/velox/expression/tests/FuzzerRunner.cpp +++ b/velox/expression/tests/FuzzerRunner.cpp @@ -210,8 +210,10 @@ ExpressionFuzzerVerifier::Options getExpressionFuzzerVerifierOptions( int FuzzerRunner::run( size_t seed, const std::unordered_set& skipFunctions, - const std::unordered_map& queryConfigs) { - runFromGtest(seed, skipFunctions, queryConfigs); + const std::unordered_map& queryConfigs, + const std::unordered_map>& + customArgumentGenerators) { + runFromGtest(seed, skipFunctions, queryConfigs, customArgumentGenerators); return RUN_ALL_TESTS(); } @@ -219,13 +221,16 @@ int FuzzerRunner::run( void FuzzerRunner::runFromGtest( size_t seed, const std::unordered_set& skipFunctions, - const std::unordered_map& queryConfigs) { + const std::unordered_map& queryConfigs, + const std::unordered_map>& + customArgumentGenerators) { memory::MemoryManager::testingSetInstance({}); auto signatures = facebook::velox::getFunctionSignatures(); ExpressionFuzzerVerifier( signatures, seed, - getExpressionFuzzerVerifierOptions(skipFunctions, queryConfigs)) + getExpressionFuzzerVerifierOptions(skipFunctions, queryConfigs), + customArgumentGenerators) .go(); } } // namespace facebook::velox::test diff --git a/velox/expression/tests/FuzzerRunner.h b/velox/expression/tests/FuzzerRunner.h index cbf3d5ac290a9..efb09b262b461 100644 --- a/velox/expression/tests/FuzzerRunner.h +++ b/velox/expression/tests/FuzzerRunner.h @@ -22,6 +22,7 @@ #include #include +#include "velox/expression/tests/ArgumentGenerator.h" #include "velox/expression/tests/ExpressionFuzzerVerifier.h" #include "velox/functions/FunctionRegistry.h" @@ -33,12 +34,16 @@ class FuzzerRunner { static int run( size_t seed, const std::unordered_set& skipFunctions, - const std::unordered_map& queryConfigs); + const std::unordered_map& queryConfigs, + const std::unordered_map>& + customArgumentGenerators); static void runFromGtest( size_t seed, const std::unordered_set& skipFunctions, - const std::unordered_map& queryConfigs); + const std::unordered_map& queryConfigs, + const std::unordered_map>& + customArgumentGenerators); }; } // namespace facebook::velox::test diff --git a/velox/expression/tests/SparkExpressionFuzzerTest.cpp b/velox/expression/tests/SparkExpressionFuzzerTest.cpp index 1232c49ed12ab..68e34c81d129c 100644 --- a/velox/expression/tests/SparkExpressionFuzzerTest.cpp +++ b/velox/expression/tests/SparkExpressionFuzzerTest.cpp @@ -24,6 +24,8 @@ #include "velox/expression/tests/FuzzerRunner.h" #include "velox/functions/sparksql/Register.h" +#include "velox/functions/sparksql/fuzzer/MakeTimestampArgumentGenerator.h" +#include "velox/functions/sparksql/fuzzer/UnscaledValueArgumentGenerator.h" DEFINE_int64( seed, @@ -62,5 +64,17 @@ int main(int argc, char** argv) { {facebook::velox::core::QueryConfig::kSessionTimezone, "America/Los_Angeles"}}; - return FuzzerRunner::run(FLAGS_seed, skipFunctions, queryConfigs); + const std::unordered_map< + std::string, + std::shared_ptr> + customArgumentGenerators = { + {"unscaled_value", + std::make_shared()}, + {"make_timestamp", + std::make_shared()}}; + + return FuzzerRunner::run( + FLAGS_seed, skipFunctions, queryConfigs, customArgumentGenerators); } diff --git a/velox/functions/prestosql/fuzzer/CMakeLists.txt b/velox/functions/prestosql/fuzzer/CMakeLists.txt index 392d206c17222..e64e7731e3cc1 100644 --- a/velox/functions/prestosql/fuzzer/CMakeLists.txt +++ b/velox/functions/prestosql/fuzzer/CMakeLists.txt @@ -37,3 +37,6 @@ target_link_libraries( velox_functions_prestosql gtest gtest_main) + +add_library(velox_expression_fuzzer_utility ExtremeArgumentGenerator.cpp) +target_link_libraries(velox_expression_fuzzer_utility fmt::fmt) \ No newline at end of file diff --git a/velox/functions/prestosql/fuzzer/ExtremeArgumentGenerator.cpp b/velox/functions/prestosql/fuzzer/ExtremeArgumentGenerator.cpp new file mode 100644 index 0000000000000..eafcf28db6e4f --- /dev/null +++ b/velox/functions/prestosql/fuzzer/ExtremeArgumentGenerator.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "velox/functions/prestosql/fuzzer/ExtremeArgumentGenerator.h" +#include "velox/expression/tests/ExpressionFuzzer.h" + +namespace facebook::velox::functions::test { + +std::vector ExtremeArgumentGenerator::generate( + ::facebook::velox::test::ExpressionFuzzer* expressionFuzzer, + const ::facebook::velox::test::CallableSignature& input, + int32_t maxNumVarArgs) { + const auto argTypes = input.args; + VELOX_CHECK_GE( + argTypes.size(), + 1, + "At least one input is expected from the template signature."); + if (!argTypes[0]->isDecimal()) { + return expressionFuzzer->generateArgs(input); + } + + auto numVarArgs = + !input.variableArity ? 0 : expressionFuzzer->rand32(0, maxNumVarArgs); + std::vector inputExpressions; + inputExpressions.reserve(argTypes.size() + numVarArgs); + inputExpressions.emplace_back( + expressionFuzzer->generateArg(argTypes.at(0), input.constantArgs.at(0))); + + // Append varargs to the argument list. + for (int i = 0; i < numVarArgs; i++) { + core::TypedExprPtr argExpr; + // The varargs need to be generated following the result type of the first + // argument. But when nested expression is generated, that cannot be + // guaranteed as argument precisions and scales cannot be inferred from the + // result type through a decimal function signature. Given this limitation, + // generate constant or column only. + const auto argType = inputExpressions[0]->type(); + if (expressionFuzzer->rand32(0, 1) == 0) { + argExpr = expressionFuzzer->generateArgConstant(argType); + } else { + argExpr = expressionFuzzer->generateArgColumn(argType); + } + inputExpressions.emplace_back(argExpr); + } + return inputExpressions; +} + +} // namespace facebook::velox::functions::test diff --git a/velox/functions/prestosql/fuzzer/ExtremeArgumentGenerator.h b/velox/functions/prestosql/fuzzer/ExtremeArgumentGenerator.h new file mode 100644 index 0000000000000..f4293898f7068 --- /dev/null +++ b/velox/functions/prestosql/fuzzer/ExtremeArgumentGenerator.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "velox/expression/tests/ArgumentGenerator.h" + +namespace facebook::velox::functions::test { + +namespace facebook::velox::test { +class ExpressionFuzzer; +} + +/// Generates custom arguments for the "greatest" and "least" functions: decimal +/// varargs need to be constant or column. +class ExtremeArgumentGenerator : public velox::test::ArgumentGenerator { + public: + std::vector generate( + ::facebook::velox::test::ExpressionFuzzer* expressionFuzzer, + const ::facebook::velox::test::CallableSignature& input, + int32_t maxNumVarArgs) override; +}; + +} // namespace facebook::velox::functions::test diff --git a/velox/functions/sparksql/fuzzer/CMakeLists.txt b/velox/functions/sparksql/fuzzer/CMakeLists.txt index 62f8eb1cda8c5..5074c1c78e9f9 100644 --- a/velox/functions/sparksql/fuzzer/CMakeLists.txt +++ b/velox/functions/sparksql/fuzzer/CMakeLists.txt @@ -24,3 +24,8 @@ target_link_libraries( velox_vector_test_lib gtest gtest_main) + +add_library(spark_expression_fuzzer_utility MakeTimestampArgumentGenerator.cpp + UnscaledValueArgumentGenerator.cpp) + +target_link_libraries(spark_expression_fuzzer_utility fmt::fmt) \ No newline at end of file diff --git a/velox/functions/sparksql/fuzzer/MakeTimestampArgumentGenerator.cpp b/velox/functions/sparksql/fuzzer/MakeTimestampArgumentGenerator.cpp new file mode 100644 index 0000000000000..ab88eeea51794 --- /dev/null +++ b/velox/functions/sparksql/fuzzer/MakeTimestampArgumentGenerator.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "velox/functions/sparksql/fuzzer/MakeTimestampArgumentGenerator.h" +#include "velox/expression/tests/ExpressionFuzzer.h" + +namespace facebook::velox::functions::sparksql::test { + +std::vector MakeTimestampArgumentGenerator::generate( + ::facebook::velox::test::ExpressionFuzzer* expressionFuzzer, + const ::facebook::velox::test::CallableSignature& input, + int32_t maxNumVarArgs) { + VELOX_CHECK_GE( + input.args.size(), + 6, + "At least six inputs are expected from the template signature."); + bool useTimezone = expressionFuzzer->rand32(0, 1); + std::vector inputExpressions; + inputExpressions.reserve(6); + for (int index = 0; index < 5; ++index) { + inputExpressions.emplace_back( + expressionFuzzer->generateArg(input.args[index])); + } + + // The required result type of the sixth argument is a short decimal type with + // scale being 6. But when nested expression is generated, that cannot be + // guaranteed as argument precisions and scales cannot be inferred from the + // result type through a decimal function signature. Given this limitation, + // generate constant or column only. + core::TypedExprPtr argExpr; + if (expressionFuzzer->rand32(0, 1) == 0) { + argExpr = expressionFuzzer->generateArgConstant(input.args[5]); + } else { + argExpr = expressionFuzzer->generateArgColumn(input.args[5]); + } + inputExpressions.emplace_back(argExpr); + + if (input.args.size() == 7) { + // The 7th. argument cannot be randomly generated as it should be a valid + // timezone string. + std::vector timezoneSet = { + "Asia/Kolkata", + "America/Los_Angeles", + "Canada/Atlantic", + "+08:00", + "-10:00"}; + inputExpressions.emplace_back(std::make_shared( + VARCHAR(), variant(timezoneSet[expressionFuzzer->rand32(0, 4)]))); + } + return inputExpressions; +} + +} // namespace facebook::velox::functions::sparksql::test diff --git a/velox/functions/sparksql/fuzzer/MakeTimestampArgumentGenerator.h b/velox/functions/sparksql/fuzzer/MakeTimestampArgumentGenerator.h new file mode 100644 index 0000000000000..c601817abe187 --- /dev/null +++ b/velox/functions/sparksql/fuzzer/MakeTimestampArgumentGenerator.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "velox/expression/tests/ArgumentGenerator.h" + +namespace facebook::velox::functions::sparksql::test { + +namespace facebook::velox::test { +class ExpressionFuzzer; +} + +/// Generates custom arguments for the "make_timestamp" function: 1) decimal +/// argument needs to be constant or column. 2) timezone argument needs to be +/// valid. +class MakeTimestampArgumentGenerator : public velox::test::ArgumentGenerator { + public: + std::vector generate( + ::facebook::velox::test::ExpressionFuzzer* expressionFuzzer, + const ::facebook::velox::test::CallableSignature& input, + int32_t maxNumVarArgs) override; +}; + +} // namespace facebook::velox::functions::sparksql::test diff --git a/velox/functions/sparksql/fuzzer/UnscaledValueArgumentGenerator.cpp b/velox/functions/sparksql/fuzzer/UnscaledValueArgumentGenerator.cpp new file mode 100644 index 0000000000000..97e767cdd6710 --- /dev/null +++ b/velox/functions/sparksql/fuzzer/UnscaledValueArgumentGenerator.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "velox/functions/sparksql/fuzzer/UnscaledValueArgumentGenerator.h" +#include "velox/expression/tests/ExpressionFuzzer.h" + +namespace facebook::velox::functions::sparksql::test { + +std::vector UnscaledValueArgumentGenerator::generate( + ::facebook::velox::test::ExpressionFuzzer* expressionFuzzer, + const ::facebook::velox::test::CallableSignature& input, + int32_t maxNumVarArgs) { + VELOX_CHECK_EQ( + input.args.size(), + 1, + "Only one input is expected from the template signature."); + + // The required result type of input argument is a short decimal type. But + // when nested expression is generated, that cannot be guaranteed as argument + // precisions and scales cannot be inferred from the result type through a + // decimal function signature. Given this limitation, generate constant or + // column only. + std::vector inputExpressions; + core::TypedExprPtr argExpr; + if (expressionFuzzer->rand32(0, 1) == 0) { + argExpr = expressionFuzzer->generateArgConstant(input.args[0]); + } else { + argExpr = expressionFuzzer->generateArgColumn(input.args[0]); + } + inputExpressions.emplace_back(argExpr); + return inputExpressions; +} + +} // namespace facebook::velox::functions::sparksql::test diff --git a/velox/functions/sparksql/fuzzer/UnscaledValueArgumentGenerator.h b/velox/functions/sparksql/fuzzer/UnscaledValueArgumentGenerator.h new file mode 100644 index 0000000000000..7e99d3976e0cc --- /dev/null +++ b/velox/functions/sparksql/fuzzer/UnscaledValueArgumentGenerator.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "velox/expression/tests/ArgumentGenerator.h" + +namespace facebook::velox::functions::sparksql::test { + +namespace facebook::velox::test { +class ExpressionFuzzer; +} + +/// Generates custom arguments for the "unscaled_value" function: decimal +/// argument needs to be constant or column. +class UnscaledValueArgumentGenerator : public velox::test::ArgumentGenerator { + public: + std::vector generate( + ::facebook::velox::test::ExpressionFuzzer* expressionFuzzer, + const ::facebook::velox::test::CallableSignature& input, + int32_t maxNumVarArgs) override; +}; + +} // namespace facebook::velox::functions::sparksql::test