Skip to content

Commit

Permalink
Add support for DECIMAL types to Expression Fuzzer
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo committed Sep 5, 2024
1 parent 94dcf02 commit 70cf156
Show file tree
Hide file tree
Showing 17 changed files with 329 additions and 118 deletions.
4 changes: 2 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -206,11 +206,11 @@ with a benchmark.
```
# Test the new function in isolation. Use --only flag to restrict the set of functions
# and run for 60 seconds or longer.
velox_expression_fuzzer_test --only <my-new-function-name> --duration_sec 60 --logtostderr=1 --enable_variadic_signatures --velox_fuzzer_enable_complex_types --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse
velox_expression_fuzzer_test --only <my-new-function-name> --duration_sec 60 --logtostderr=1 --enable_variadic_signatures --velox_fuzzer_enable_complex_types --velox_fuzzer_enable_decimal_type --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse
# Test the new function in combination with other functions. Do not restrict the set
# of functions and run for 10 minutes (600 seconds) or longer.
velox_expression_fuzzer_test --duration_sec 600 --logtostderr=1 --enable_variadic_signatures --velox_fuzzer_enable_complex_types --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse
velox_expression_fuzzer_test --duration_sec 600 --logtostderr=1 --enable_variadic_signatures --velox_fuzzer_enable_complex_types --velox_fuzzer_enable_decimal_type --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse
```

Here are example PRs:
Expand Down
2 changes: 2 additions & 0 deletions velox/docs/develop/testing/fuzzer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,8 @@ Below are arguments that toggle certain fuzzer features in Expression Fuzzer:

* ``--velox_fuzzer_enable_complex_types``: Enable testing of function signatures with complex argument or return types. Default is false.

* ``--velox_fuzzer_enable_decimal_type``: Enable testing of function signatures with decimal argument or return type. Default is false.

* ``--lazy_vector_generation_ratio``: Specifies the probability with which columns in the input row vector will be selected to be wrapped in lazy encoding (expressed as double from 0 to 1). Default is 0.0.

* ``--velox_fuzzer_enable_column_reuse``: Enable generation of expressions where one input column can be used by multiple subexpressions. Default is false.
Expand Down
20 changes: 0 additions & 20 deletions velox/expression/ReverseSignatureBinder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,7 @@

namespace facebook::velox::exec {

bool ReverseSignatureBinder::hasConstrainedIntegerVariable(
const TypeSignature& type) const {
if (type.parameters().empty()) {
auto it = variables().find(type.baseName());
return it != variables().end() && it->second.isIntegerParameter() &&
it->second.constraint() != "";
}

const auto& parameters = type.parameters();
for (const auto& parameter : parameters) {
if (hasConstrainedIntegerVariable(parameter)) {
return true;
}
}
return false;
}

bool ReverseSignatureBinder::tryBind() {
if (hasConstrainedIntegerVariable(signature_.returnType())) {
return false;
}
tryBindSucceeded_ =
SignatureBinderBase::tryBind(signature_.returnType(), returnType_);
return tryBindSucceeded_;
Expand Down
4 changes: 0 additions & 4 deletions velox/expression/ReverseSignatureBinder.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,6 @@ class ReverseSignatureBinder : private SignatureBinderBase {
}

private:
// Return whether there is a constraint on an integer variable in type
// signature.
bool hasConstrainedIntegerVariable(const TypeSignature& type) const;

const TypePtr returnType_;

// True if 'tryBind' has been called and succeeded. False otherwise.
Expand Down
174 changes: 114 additions & 60 deletions velox/expression/fuzzer/ArgumentTypeFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,70 +58,43 @@ bool isPositiveInteger(const std::string& str) {

void ArgumentTypeFuzzer::determineUnboundedIntegerVariables(
const exec::TypeSignature& type) {
if (!isDecimalBaseName(type.baseName())) {
std::vector<exec::TypeSignature> decimalTypes;
findDecimalTypes(type, decimalTypes);
if (decimalTypes.empty()) {
return;
}
for (const auto& decimalType : decimalTypes) {
auto [p, s] = tryBindFixedPrecisionScale(decimalType);

VELOX_CHECK_EQ(2, type.parameters().size())

const auto& precision = type.parameters()[0].baseName();
const auto& scale = type.parameters()[1].baseName();

// Bind 'name' variable, if not already bound, using 'constant' constraint
// ('name'='123'). Return bound value if 'name' is already bound or was
// successfully bound to a constant value. Return std::nullopt otherwise.
auto tryFixedBinding = [&](const auto& name) -> std::optional<int> {
auto it = variables().find(name);
if (it == variables().end()) {
VELOX_CHECK(
isPositiveInteger(name),
"Precision and scale of a decimal type must refer to a variable "
"or specify a position integer constant: {}",
name)
return std::stoi(name);
}

if (integerBindings_.count(name) > 0) {
return integerBindings_[name];
if (p.has_value() && s.has_value()) {
return;
}

if (isPositiveInteger(it->second.constraint())) {
const auto value = std::stoi(it->second.constraint());
integerBindings_[name] = value;
return value;
}

return std::nullopt;
};

std::optional<int> p = tryFixedBinding(precision);
std::optional<int> s = tryFixedBinding(scale);
const auto& precision = decimalType.parameters()[0].baseName();
const auto& scale = decimalType.parameters()[1].baseName();

if (p.has_value() && s.has_value()) {
return;
}
if (s.has_value()) {
p = std::max<int>(ShortDecimalType::kMinPrecision, s.value());
if (p < LongDecimalType::kMaxPrecision) {
p = p.value() + rand32(0, LongDecimalType::kMaxPrecision - p.value());
}

if (s.has_value()) {
p = std::max<int>(ShortDecimalType::kMinPrecision, s.value());
if (p < LongDecimalType::kMaxPrecision) {
p = p.value() + rand32(0, LongDecimalType::kMaxPrecision - p.value());
integerBindings_[precision] = p.value();
return;
}

integerBindings_[precision] = p.value();
return;
}
if (p.has_value()) {
s = rand32(0, p.value());
integerBindings_[scale] = s.value();
return;
}

if (p.has_value()) {
p = rand32(1, LongDecimalType::kMaxPrecision);
s = rand32(0, p.value());

integerBindings_[precision] = p.value();
integerBindings_[scale] = s.value();
return;
}

p = rand32(1, LongDecimalType::kMaxPrecision);
s = rand32(0, p.value());

integerBindings_[precision] = p.value();
integerBindings_[scale] = s.value();
}

void ArgumentTypeFuzzer::determineUnboundedTypeVariables() {
Expand Down Expand Up @@ -154,24 +127,55 @@ TypePtr ArgumentTypeFuzzer::randOrderableType() {
}

bool ArgumentTypeFuzzer::fuzzArgumentTypes(uint32_t maxVariadicArgs) {
const auto& formalArgs = signature_.argumentTypes();
auto formalArgsCnt = formalArgs.size();

if (returnType_) {
if (returnType_ == nullptr) {
for (const auto& [name, _] : signature_.variables()) {
bindings_.insert({name, nullptr});
}
} else {
exec::ReverseSignatureBinder binder{signature_, returnType_};
if (!binder.tryBind()) {
return false;
}

auto types = signature_.argumentTypes();
types.emplace_back(signature_.returnType());
for (const auto& type : types) {
std::vector<exec::TypeSignature> decimalTypes;
findDecimalTypes(type, decimalTypes);
if (decimalTypes.empty()) {
continue;
}

// Verify if the precision and scale variables of argument types and
// return type can be bound to constant values. If not and extra
// constraint is provided, argument types cannot be generated without
// following these constraints.
for (const auto& decimalType : decimalTypes) {
const auto [p, s] = tryBindFixedPrecisionScale(decimalType);

const auto& precision = decimalType.parameters()[0].baseName();
const auto& scale = decimalType.parameters()[1].baseName();

const auto hasConstraint = [&](const std::string& name) {
return !variables().at(name).constraint().empty();
};

if ((!p.has_value() && hasConstraint(precision)) ||
(!s.has_value() && hasConstraint(scale))) {
return false;
}
}
}

bindings_ = binder.bindings();
integerBindings_ = binder.integerBindings();
} else {
for (const auto& [name, _] : signature_.variables()) {
bindings_.insert({name, nullptr});
}
}

const auto& formalArgs = signature_.argumentTypes();
auto formalArgsCnt = formalArgs.size();

determineUnboundedTypeVariables();
for (const auto& argType : signature_.argumentTypes()) {
for (const auto& argType : formalArgs) {
determineUnboundedIntegerVariables(argType);
}
for (auto i = 0; i < formalArgsCnt; i++) {
Expand Down Expand Up @@ -226,4 +230,54 @@ int32_t ArgumentTypeFuzzer::rand32(int32_t min, int32_t max) {
return boost::random::uniform_int_distribution<uint32_t>(min, max)(rng_);
}

std::optional<int> ArgumentTypeFuzzer::tryFixedBinding(
const std::string& name) {
auto it = variables().find(name);
if (it == variables().end()) {
VELOX_CHECK(
isPositiveInteger(name),
"Precision and scale of a decimal type must refer to a variable "
"or specify a positive integer constant: {}",
name)
return std::stoi(name);
}

if (integerBindings_.count(name) > 0) {
return integerBindings_[name];
}

if (isPositiveInteger(it->second.constraint())) {
const auto value = std::stoi(it->second.constraint());
integerBindings_[name] = value;
return value;
}

return std::nullopt;
}

std::pair<std::optional<int>, std::optional<int>>
ArgumentTypeFuzzer::tryBindFixedPrecisionScale(
const exec::TypeSignature& type) {
VELOX_CHECK(isDecimalBaseName(type.baseName()));
VELOX_CHECK_EQ(2, type.parameters().size())

const auto& precisionName = type.parameters()[0].baseName();
const auto& scaleName = type.parameters()[1].baseName();

std::optional<int> p = tryFixedBinding(precisionName);
std::optional<int> s = tryFixedBinding(scaleName);
return {p, s};
}

void ArgumentTypeFuzzer::findDecimalTypes(
const exec::TypeSignature& type,
std::vector<exec::TypeSignature>& decimalTypes) const {
if (isDecimalBaseName(type.baseName())) {
decimalTypes.push_back(type);
}
for (const auto& param : type.parameters()) {
findDecimalTypes(param, decimalTypes);
}
}

} // namespace facebook::velox::fuzzer
16 changes: 16 additions & 0 deletions velox/expression/fuzzer/ArgumentTypeFuzzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,22 @@ class ArgumentTypeFuzzer {
/// Generates an orderable random type, including structs, and arrays.
TypePtr randOrderableType();

// Bind 'name' variable, if not already bound, using 'constant' constraint
// ('name'='123'). Return bound value if 'name' is already bound or was
// successfully bound to a constant value. Return std::nullopt otherwise.
std::optional<int> tryFixedBinding(const std::string& name);

// Bind the precision and scale variables in a decimal type signature to
// constant values. Return std::nullopt if the variable cannot be bound to a
// constant value.
std::pair<std::optional<int>, std::optional<int>> tryBindFixedPrecisionScale(
const exec::TypeSignature& type);

// Find all the nested decimal type signatures recursively.
void findDecimalTypes(
const exec::TypeSignature& type,
std::vector<exec::TypeSignature>& decimalTypes) const;

const exec::FunctionSignature& signature_;

TypePtr returnType_;
Expand Down
Loading

0 comments on commit 70cf156

Please sign in to comment.