Skip to content

Commit

Permalink
Support expression transformers in expression fuzzer (#11264)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #11264

Some Velox functions returns Array-typed results with different order
of elements from Presto, such as array_intersect, array_except, etc.
This mismatch of element order is cosmetic because the order is not
defined. To tolerate this mismatch in fuzzer, this diff introduces an
SortArrayTransformer that wraps these array functions in array_sort
when generating the random expressions.

There is a limitation with the current SortArrayTransformer that it
doesn't support arrays that contain maps because map is not an
orderable type. In this situation, SortArrayTransformer transform the
expression into a NULL constant. This can be extended later to apply
`array_sort(a, (x, y) -> if(json_format(cast(x as json)) < json_format(cast(y as json)), 1, if(json_format(cast(x as json)) = json_format(cast(y as json)), 0, -1)))`
if the array contains map.

Reviewed By: bikramSingh91

Differential Revision: D64358222

fbshipit-source-id: 12b35def70b1fb8297499a09324ad3292b3052c5
  • Loading branch information
kagamiori authored and facebook-github-bot committed Oct 29, 2024
1 parent 6440a44 commit e67f11b
Show file tree
Hide file tree
Showing 8 changed files with 175 additions and 19 deletions.
35 changes: 35 additions & 0 deletions velox/exec/fuzzer/ExprTransformer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include "velox/core/Expressions.h"

namespace facebook::velox::exec::test {

class ExprTransformer {
public:
virtual ~ExprTransformer() = default;

/// Transforms the given expression into a new expression. This should be
/// called during the expression generation in expression fuzzer.
virtual core::TypedExprPtr transform(core::TypedExprPtr) const = 0;

/// Returns the additional number of levels of nesting introduced by the
/// transformation.
virtual int32_t extraLevelOfNesting() const = 0;
};

} // namespace facebook::velox::exec::test
46 changes: 31 additions & 15 deletions velox/expression/fuzzer/ExpressionFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1004,23 +1004,39 @@ core::TypedExprPtr ExpressionFuzzer::generateExpression(
chosenFunctionName = templateList[chosenExprIndex];
}

if (chosenFunctionName == "cast") {
expression = generateCastExpression(returnType);
} else if (chosenFunctionName == "row_constructor") {
// Avoid generating deeply nested types that is rarely used in practice.
if (levelOfNesting(returnType) < 3) {
expression = generateRowConstructorExpression(returnType);
}
} else if (chosenFunctionName == "dereference") {
expression = generateDereferenceExpression(returnType);
} else {
expression = generateExpressionFromConcreteSignatures(
returnType, chosenFunctionName);
if (!expression &&
(options_.enableComplexTypes || options_.enableDecimalType)) {
expression = generateExpressionFromSignatureTemplate(
auto exprTransformer = options_.exprTransformers.find(chosenFunctionName);
if (exprTransformer != options_.exprTransformers.end()) {
state.remainingLevelOfNesting_ -=
exprTransformer->second->extraLevelOfNesting();
}

if (state.remainingLevelOfNesting_ >= 0) {
if (chosenFunctionName == "cast") {
expression = generateCastExpression(returnType);
} else if (chosenFunctionName == "row_constructor") {
// Avoid generating deeply nested types that is rarely used in practice.
if (levelOfNesting(returnType) < 3) {
expression = generateRowConstructorExpression(returnType);
}
} else if (chosenFunctionName == "dereference") {
expression = generateDereferenceExpression(returnType);
} else {
expression = generateExpressionFromConcreteSignatures(
returnType, chosenFunctionName);
if (!expression &&
(options_.enableComplexTypes || options_.enableDecimalType)) {
expression = generateExpressionFromSignatureTemplate(
returnType, chosenFunctionName);
}
}
}

if (exprTransformer != options_.exprTransformers.end()) {
if (expression) {
expression = exprTransformer->second->transform(std::move(expression));
}
state.remainingLevelOfNesting_ +=
exprTransformer->second->extraLevelOfNesting();
}
}
if (!expression) {
Expand Down
5 changes: 5 additions & 0 deletions velox/expression/fuzzer/ExpressionFuzzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include "velox/core/ITypedExpr.h"
#include "velox/core/QueryCtx.h"
#include "velox/exec/fuzzer/ExprTransformer.h"
#include "velox/exec/fuzzer/ReferenceQueryRunner.h"
#include "velox/expression/Expr.h"
#include "velox/expression/fuzzer/ArgGenerator.h"
Expand All @@ -30,6 +31,7 @@
namespace facebook::velox::fuzzer {

using exec::test::ReferenceQueryRunner;
using facebook::velox::exec::test::ExprTransformer;

// A tool that can be used to generate random expressions.
class ExpressionFuzzer {
Expand Down Expand Up @@ -101,6 +103,9 @@ class ExpressionFuzzer {
// "array_sort(array(T),constant function(T,T,bigint)) -> array(T)"}
std::unordered_set<std::string> skipFunctions;

std::unordered_map<std::string, std::shared_ptr<ExprTransformer>>
exprTransformers;

// When set, when the input size of the generated expressions reaches
// maxInputsThreshold, fuzzing input columns will reuse one of the existing
// columns if any is already generated with the same type.
Expand Down
9 changes: 9 additions & 0 deletions velox/expression/fuzzer/ExpressionFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "velox/functions/prestosql/fuzzer/ModulusArgGenerator.h"
#include "velox/functions/prestosql/fuzzer/MultiplyArgGenerator.h"
#include "velox/functions/prestosql/fuzzer/PlusMinusArgGenerator.h"
#include "velox/functions/prestosql/fuzzer/SortArrayTransformer.h"
#include "velox/functions/prestosql/fuzzer/TruncateArgGenerator.h"
#include "velox/functions/prestosql/registration/RegistrationFunctions.h"

Expand Down Expand Up @@ -113,6 +114,13 @@ int main(int argc, char** argv) {
{"mod", std::make_shared<ModulusArgGenerator>()},
{"truncate", std::make_shared<TruncateArgGenerator>()}};

std::unordered_map<std::string, std::shared_ptr<ExprTransformer>>
exprTransformers = {
{"array_intersect", std::make_shared<SortArrayTransformer>()},
{"array_except", std::make_shared<SortArrayTransformer>()},
{"map_keys", std::make_shared<SortArrayTransformer>()},
{"map_values", std::make_shared<SortArrayTransformer>()}};

std::shared_ptr<facebook::velox::memory::MemoryPool> rootPool{
facebook::velox::memory::memoryManager()->addRootPool()};
std::shared_ptr<ReferenceQueryRunner> referenceQueryRunner{nullptr};
Expand All @@ -127,6 +135,7 @@ int main(int argc, char** argv) {
FuzzerRunner::runFromGtest(
initialSeed,
skipFunctions,
exprTransformers,
{{"session_timezone", "America/Los_Angeles"},
{"adjust_timestamp_to_session_timezone", "true"}},
argGenerators,
Expand Down
22 changes: 18 additions & 4 deletions velox/expression/fuzzer/FuzzerRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ VectorFuzzer::Options getVectorFuzzerOptions() {

ExpressionFuzzer::Options getExpressionFuzzerOptions(
const std::unordered_set<std::string>& skipFunctions,
const std::unordered_map<std::string, std::shared_ptr<ExprTransformer>>&
exprTransformers,
std::shared_ptr<exec::test::ReferenceQueryRunner> referenceQueryRunner) {
ExpressionFuzzer::Options opts;
opts.maxLevelOfNesting = FLAGS_velox_fuzzer_max_level_of_nesting;
Expand All @@ -185,11 +187,14 @@ ExpressionFuzzer::Options getExpressionFuzzerOptions(
opts.useOnlyFunctions = FLAGS_only;
opts.skipFunctions = skipFunctions;
opts.referenceQueryRunner = referenceQueryRunner;
opts.exprTransformers = exprTransformers;
return opts;
}

ExpressionFuzzerVerifier::Options getExpressionFuzzerVerifierOptions(
const std::unordered_set<std::string>& skipFunctions,
const std::unordered_map<std::string, std::shared_ptr<ExprTransformer>>&
exprTransformers,
const std::unordered_map<std::string, std::string>& queryConfigs,
std::shared_ptr<exec::test::ReferenceQueryRunner> referenceQueryRunner) {
ExpressionFuzzerVerifier::Options opts;
Expand All @@ -204,8 +209,8 @@ ExpressionFuzzerVerifier::Options getExpressionFuzzerVerifierOptions(
opts.lazyVectorGenerationRatio = FLAGS_lazy_vector_generation_ratio;
opts.maxExpressionTreesPerStep = FLAGS_max_expression_trees_per_step;
opts.vectorFuzzerOptions = getVectorFuzzerOptions();
opts.expressionFuzzerOptions =
getExpressionFuzzerOptions(skipFunctions, referenceQueryRunner);
opts.expressionFuzzerOptions = getExpressionFuzzerOptions(
skipFunctions, exprTransformers, referenceQueryRunner);
opts.queryConfigs = queryConfigs;
return opts;
}
Expand All @@ -216,19 +221,28 @@ ExpressionFuzzerVerifier::Options getExpressionFuzzerVerifierOptions(
int FuzzerRunner::run(
size_t seed,
const std::unordered_set<std::string>& skipFunctions,
const std::unordered_map<std::string, std::shared_ptr<ExprTransformer>>&
exprTransformers,
const std::unordered_map<std::string, std::string>& queryConfigs,
const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>&
argGenerators,
std::shared_ptr<exec::test::ReferenceQueryRunner> referenceQueryRunner) {
runFromGtest(
seed, skipFunctions, queryConfigs, argGenerators, referenceQueryRunner);
seed,
skipFunctions,
exprTransformers,
queryConfigs,
argGenerators,
referenceQueryRunner);
return RUN_ALL_TESTS();
}

// static
void FuzzerRunner::runFromGtest(
size_t seed,
const std::unordered_set<std::string>& skipFunctions,
const std::unordered_map<std::string, std::shared_ptr<ExprTransformer>>&
exprTransformers,
const std::unordered_map<std::string, std::string>& queryConfigs,
const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>&
argGenerators,
Expand All @@ -241,7 +255,7 @@ void FuzzerRunner::runFromGtest(
signatures,
seed,
getExpressionFuzzerVerifierOptions(
skipFunctions, queryConfigs, referenceQueryRunner),
skipFunctions, exprTransformers, queryConfigs, referenceQueryRunner),
argGenerators)
.go();
}
Expand Down
7 changes: 7 additions & 0 deletions velox/expression/fuzzer/FuzzerRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,23 @@
#include <unordered_set>
#include <vector>

#include "velox/exec/fuzzer/ExprTransformer.h"
#include "velox/exec/fuzzer/ReferenceQueryRunner.h"
#include "velox/expression/fuzzer/ExpressionFuzzerVerifier.h"
#include "velox/functions/FunctionRegistry.h"

namespace facebook::velox::fuzzer {

using facebook::velox::exec::test::ExprTransformer;

/// FuzzerRunner leverages ExpressionFuzzerVerifier to create a gtest unit test.
class FuzzerRunner {
public:
static int run(
size_t seed,
const std::unordered_set<std::string>& skipFunctions,
const std::unordered_map<std::string, std::shared_ptr<ExprTransformer>>&
exprTransformers,
const std::unordered_map<std::string, std::string>& queryConfigs,
const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>&
argGenerators,
Expand All @@ -42,6 +47,8 @@ class FuzzerRunner {
static void runFromGtest(
size_t seed,
const std::unordered_set<std::string>& skipFunctions,
const std::unordered_map<std::string, std::shared_ptr<ExprTransformer>>&
exprTransformers,
const std::unordered_map<std::string, std::string>& queryConfigs,
const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>&
argGenerators,
Expand Down
1 change: 1 addition & 0 deletions velox/expression/fuzzer/SparkExpressionFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ int main(int argc, char** argv) {
return FuzzerRunner::run(
FLAGS_seed,
skipFunctions,
{{}},
queryConfigs,
argGenerators,
referenceQueryRunner);
Expand Down
69 changes: 69 additions & 0 deletions velox/functions/prestosql/fuzzer/SortArrayTransformer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include "velox/core/Expressions.h"
#include "velox/exec/fuzzer/ExprTransformer.h"
#include "velox/type/Type.h"

namespace facebook::velox::exec::test {

using facebook::velox::TypePtr;
using facebook::velox::core::TypedExprPtr;
using facebook::velox::exec::test::ExprTransformer;

class SortArrayTransformer : public ExprTransformer {
public:
~SortArrayTransformer() override = default;

/// Wraps 'expr' in a call to array_sort. If the type of 'expr' contains a
/// map, array_sort doesn't support this type, so we return a constant null
/// instead.
TypedExprPtr transform(TypedExprPtr expr) const override {
facebook::velox::TypePtr type = expr->type();
if (containsMap(type)) {
// TODO: support map type by using array_sort with a lambda that casts
// array elements to JSON before comparison.
return std::make_shared<facebook::velox::core::ConstantTypedExpr>(
type, facebook::velox::variant::null(type->kind()));
} else {
return std::make_shared<facebook::velox::core::CallTypedExpr>(
type, std::vector<TypedExprPtr>{std::move(expr)}, "array_sort");
}
}

int32_t extraLevelOfNesting() const override {
return 1;
}

private:
bool containsMap(const TypePtr& type) const {
if (type->isMap()) {
return true;
} else if (type->isArray()) {
return containsMap(type->asArray().elementType());
} else if (type->isRow()) {
for (const auto& child : type->asRow().children()) {
if (containsMap(child)) {
return true;
}
}
}
return false;
}
};

} // namespace facebook::velox::exec::test

0 comments on commit e67f11b

Please sign in to comment.