From f0b0659f7a8c307264fbd3f83aba7da80bc1f78d Mon Sep 17 00:00:00 2001 From: Wei He Date: Wed, 11 Dec 2024 19:06:41 -0800 Subject: [PATCH 1/2] fix(fuzzer): Fix SQL translation of between() in PrestoQueryRunner (#11819) Summary: The between() function requires a special syntax in SQL. This diff fixes PrestoQueryRunner to take care of between() when translating an expression to SQL. Reviewed By: yuandagits Differential Revision: D67058882 --- velox/exec/fuzzer/ToSQLUtil.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/velox/exec/fuzzer/ToSQLUtil.cpp b/velox/exec/fuzzer/ToSQLUtil.cpp index e85d98a9631c..e2f601475836 100644 --- a/velox/exec/fuzzer/ToSQLUtil.cpp +++ b/velox/exec/fuzzer/ToSQLUtil.cpp @@ -188,6 +188,14 @@ std::string toCallSql(const core::CallTypedExprPtr& call) { sql << "ARRAY["; toCallInputsSql(call->inputs(), sql); sql << "]"; + } else if (call->name() == "between") { + const auto& inputs = call->inputs(); + VELOX_CHECK_EQ(inputs.size(), 3); + toCallInputsSql({inputs[0]}, sql); + sql << " between "; + toCallInputsSql({inputs[1]}, sql); + sql << " and "; + toCallInputsSql({inputs[2]}, sql); } else { // Regular function call syntax. sql << call->name() << "("; From d57f98a9bd2fd3cbf36ccd78544176226fc02be7 Mon Sep 17 00:00:00 2001 From: Wei He Date: Wed, 11 Dec 2024 19:06:41 -0800 Subject: [PATCH 2/2] fix(fuzzer): Mark IPADDRESS and IPPREFIX types as unsuppported input types in PrestoQueryRunner (#11820) Summary: Mark IPADDRES and IPPREFIX types as unsuppported input types in PrestoQueryRunner because Presto doesn't allow creating HIVE columns of these types and requires constant literals of these types to be valid strings. Reviewed By: kevinwilfong Differential Revision: D67060600 --- velox/exec/fuzzer/FuzzerUtil.cpp | 14 +++++++++---- velox/exec/fuzzer/FuzzerUtil.h | 6 ++++++ velox/exec/fuzzer/PrestoQueryRunner.cpp | 26 +++++++++++++++---------- 3 files changed, 32 insertions(+), 14 deletions(-) diff --git a/velox/exec/fuzzer/FuzzerUtil.cpp b/velox/exec/fuzzer/FuzzerUtil.cpp index 036957964cc8..b5fc55d92cb0 100644 --- a/velox/exec/fuzzer/FuzzerUtil.cpp +++ b/velox/exec/fuzzer/FuzzerUtil.cpp @@ -260,12 +260,9 @@ bool containTypeName( return false; } -bool usesTypeName( +bool usesInputTypeName( const exec::FunctionSignature& signature, const std::string& typeName) { - if (containTypeName(signature.returnType(), typeName)) { - return true; - } for (const auto& argument : signature.argumentTypes()) { if (containTypeName(argument, typeName)) { return true; @@ -274,6 +271,15 @@ bool usesTypeName( return false; } +bool usesTypeName( + const exec::FunctionSignature& signature, + const std::string& typeName) { + if (containTypeName(signature.returnType(), typeName)) { + return true; + } + return usesInputTypeName(signature, typeName); +} + // If 'type' is a RowType or contains RowTypes with empty field names, adds // default names to these fields in the RowTypes. TypePtr sanitize(const TypePtr& type) { diff --git a/velox/exec/fuzzer/FuzzerUtil.h b/velox/exec/fuzzer/FuzzerUtil.h index 51314d60a0ee..ca289a04458e 100644 --- a/velox/exec/fuzzer/FuzzerUtil.h +++ b/velox/exec/fuzzer/FuzzerUtil.h @@ -93,6 +93,12 @@ RowTypePtr concat(const RowTypePtr& a, const RowTypePtr& b); /// TODO Investigate mismatches reported when comparing Varbinary. bool containsUnsupportedTypes(const TypePtr& type); +/// Determines whether the signature has an argument that contains typeName. +/// typeName should be in lower case. +bool usesInputTypeName( + const exec::FunctionSignature& signature, + const std::string& typeName); + /// Determines whether the signature has an argument or return type that /// contains typeName. typeName should be in lower case. bool usesTypeName( diff --git a/velox/exec/fuzzer/PrestoQueryRunner.cpp b/velox/exec/fuzzer/PrestoQueryRunner.cpp index 3ab8689030da..37f31869541d 100644 --- a/velox/exec/fuzzer/PrestoQueryRunner.cpp +++ b/velox/exec/fuzzer/PrestoQueryRunner.cpp @@ -30,6 +30,8 @@ #include "velox/exec/fuzzer/FuzzerUtil.h" #include "velox/exec/fuzzer/ToSQLUtil.h" #include "velox/exec/tests/utils/QueryAssertions.h" +#include "velox/functions/prestosql/types/IPAddressType.h" +#include "velox/functions/prestosql/types/IPPrefixType.h" #include "velox/functions/prestosql/types/JsonType.h" #include "velox/serializers/PrestoSerializer.h" #include "velox/type/parser/TypeParser.h" @@ -424,16 +426,18 @@ bool PrestoQueryRunner::isConstantExprSupported( const core::TypedExprPtr& expr) { if (std::dynamic_pointer_cast(expr)) { // TODO: support constant literals of these types. Complex-typed constant - // literals require support of converting them to SQL. Json can be enabled - // after we're able to generate valid Json strings, because when Json is - // used as the type of constant literals in SQL, Presto implicitly invoke - // json_parse() on it, which makes the behavior of Presto different from - // Velox. Timestamp constant literals require further investigation to + // literals require support of converting them to SQL. Json, Ipaddress, and + // Ipprefix can be enabled after we're able to generate valid input values, + // because when these types are used as the type of a constant literal in + // SQL, Presto implicitly invoke json_parse(), cast(x as Ipaddress), and + // cast(x as Ipprefix) on it, which makes the behavior of Presto different + // from Velox. Timestamp constant literals require further investigation to // ensure Presto uses the same timezone as Velox. Interval type cannot be // used as the type of constant literals in Presto SQL. auto& type = expr->type(); return type->isPrimitiveType() && !type->isTimestamp() && - !isJsonType(type) && !type->isIntervalDayTime(); + !isJsonType(type) && !type->isIntervalDayTime() && + !isIPAddressType(type) && !isIPPrefixType(type); } return true; } @@ -444,14 +448,16 @@ bool PrestoQueryRunner::isSupported(const exec::FunctionSignature& signature) { // cast-to or constant literals. Hyperloglog can only be casted from varbinary // and cannot be used as the type of constant literals. Interval year to month // can only be casted from NULL and cannot be used as the type of constant - // literals. Json requires special handling, because Presto requires Json - // literals to be valid Json strings, and doesn't allow creation of Json-typed - // HIVE columns. + // literals. Json, Ipaddress, and Ipprefix require special handling, because + // Presto requires literals of these types to be valid, and doesn't allow + // creating HIVE columns of these types. return !( usesTypeName(signature, "interval year to month") || usesTypeName(signature, "hugeint") || usesTypeName(signature, "hyperloglog") || - usesTypeName(signature, "json")); + usesInputTypeName(signature, "json") || + usesInputTypeName(signature, "ipaddress") || + usesInputTypeName(signature, "ipprefix")); } std::optional PrestoQueryRunner::toSql(