Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add support for UUID comparison functions #10791

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 16 additions & 13 deletions velox/exec/fuzzer/PrestoQueryRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "velox/functions/prestosql/types/IPAddressType.h"
#include "velox/functions/prestosql/types/IPPrefixType.h"
#include "velox/functions/prestosql/types/JsonType.h"
#include "velox/functions/prestosql/types/UuidType.h"
#include "velox/serializers/PrestoSerializer.h"
#include "velox/type/parser/TypeParser.h"

Expand Down Expand Up @@ -426,18 +427,19 @@ bool PrestoQueryRunner::isConstantExprSupported(
const core::TypedExprPtr& expr) {
if (std::dynamic_pointer_cast<const core::ConstantTypedExpr>(expr)) {
// TODO: support constant literals of these types. Complex-typed constant
// literals require support of converting them to SQL. Json, Ipaddress, and
// Ipprefix can be enabled after we're able to generate valid input values,
// because when these types are used as the type of a constant literal in
// SQL, Presto implicitly invoke json_parse(), cast(x as Ipaddress), and
// cast(x as Ipprefix) on it, which makes the behavior of Presto different
// from Velox. Timestamp constant literals require further investigation to
// ensure Presto uses the same timezone as Velox. Interval type cannot be
// used as the type of constant literals in Presto SQL.
// literals require support of converting them to SQL. Json, Ipaddress,
// Ipprefix, and Uuid can be enabled after we're able to generate valid
// input values, because when these types are used as the type of a constant
// literal in SQL, Presto implicitly invokes json_parse(),
// cast(x as Ipaddress), cast(x as Ipprefix) and cast(x as uuid) on it,
// which makes the behavior of Presto different from Velox. Timestamp
// constant literals require further investigation to ensure Presto uses the
// same timezone as Velox. Interval type cannot be used as the type of
// constant literals in Presto SQL.
auto& type = expr->type();
return type->isPrimitiveType() && !type->isTimestamp() &&
!isJsonType(type) && !type->isIntervalDayTime() &&
!isIPAddressType(type) && !isIPPrefixType(type);
!isIPAddressType(type) && !isIPPrefixType(type) && !isUuidType(type);
}
return true;
}
Expand All @@ -448,16 +450,17 @@ bool PrestoQueryRunner::isSupported(const exec::FunctionSignature& signature) {
// cast-to or constant literals. Hyperloglog can only be casted from varbinary
// and cannot be used as the type of constant literals. Interval year to month
// can only be casted from NULL and cannot be used as the type of constant
// literals. Json, Ipaddress, and Ipprefix require special handling, because
// Presto requires literals of these types to be valid, and doesn't allow
// creating HIVE columns of these types.
// literals. Json, Ipaddress, Ipprefix, and UUID require special handling,
// because Presto requires literals of these types to be valid, and doesn't
// allow creating HIVE columns of these types.
return !(
usesTypeName(signature, "interval year to month") ||
usesTypeName(signature, "hugeint") ||
usesTypeName(signature, "hyperloglog") ||
usesInputTypeName(signature, "json") ||
usesInputTypeName(signature, "ipaddress") ||
usesInputTypeName(signature, "ipprefix"));
usesInputTypeName(signature, "ipprefix") ||
usesInputTypeName(signature, "uuid"));
}

std::optional<std::string> PrestoQueryRunner::toSql(
Expand Down
23 changes: 23 additions & 0 deletions velox/functions/prestosql/UuidFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,29 @@

#include "velox/functions/Macros.h"
#include "velox/functions/Registerer.h"
#include "velox/functions/prestosql/Comparisons.h"
#include "velox/functions/prestosql/types/UuidType.h"

namespace facebook::velox::functions {

#define VELOX_GEN_BINARY_EXPR_UUID(Name, uuidCompExpr) \
template <typename T> \
struct Name##Uuid { \
VELOX_DEFINE_FUNCTION_TYPES(T); \
\
FOLLY_ALWAYS_INLINE void \
call(bool& result, const arg_type<Uuid>& lhs, const arg_type<Uuid>& rhs) { \
result = (uuidCompExpr); \
} \
};

VELOX_GEN_BINARY_EXPR_UUID(LtFunction, (uint128_t)lhs < (uint128_t)rhs);
VELOX_GEN_BINARY_EXPR_UUID(GtFunction, (uint128_t)lhs > (uint128_t)rhs);
VELOX_GEN_BINARY_EXPR_UUID(LteFunction, (uint128_t)lhs <= (uint128_t)rhs);
VELOX_GEN_BINARY_EXPR_UUID(GteFunction, (uint128_t)lhs >= (uint128_t)rhs);

#undef VELOX_GEN_BINARY_EXPR_UUID

template <typename T>
struct UuidFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);
Expand All @@ -42,6 +61,10 @@ struct UuidFunction {
inline void registerUuidFunctions(const std::string& prefix) {
registerUuidType();
registerFunction<UuidFunction, Uuid>({prefix + "uuid"});
registerFunction<LtFunctionUuid, bool, Uuid, Uuid>({prefix + "lt"});
registerFunction<GtFunctionUuid, bool, Uuid, Uuid>({prefix + "gt"});
registerFunction<LteFunctionUuid, bool, Uuid, Uuid>({prefix + "lte"});
registerFunction<GteFunctionUuid, bool, Uuid, Uuid>({prefix + "gte"});
}

} // namespace facebook::velox::functions
114 changes: 114 additions & 0 deletions velox/functions/prestosql/tests/UuidFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,5 +108,119 @@ TEST_F(UuidFunctionsTest, unsupportedCast) {
evaluate("cast(123 as uuid())", input), "Cannot cast BIGINT to UUID.");
}

TEST_F(UuidFunctionsTest, comparisons) {
const auto uuidEval = [&](const std::optional<std::string>& lhs,
const std::string& operation,
const std::optional<std::string>& rhs) {
return evaluateOnce<bool>(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you try this version of evaluateOnce https://github.com/facebookincubator/velox/blob/main/velox/functions/prestosql/tests/utils/FunctionBaseTest.h#L241

that can override the logical type. It might reduce the casting in your expression then.

So the call would be something like
evaluateOnce<bool>(fmt::format("c0 {} c1", operation), {UUID(), UUID()}, lhs, rhs)

But of course that assumes that you are not relying on the casting to reshuffle bytes in a way that the uint128_t comparisons hold.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That does work, but then I would need to construct the UUIDs with boost and then convert to an int128_t to pass to the eval. I think the end result is less clear, and this way also tests the UUID casting.

fmt::format("cast(c0 as uuid) {} cast(c1 as uuid)", operation),
lhs,
rhs);
};

ASSERT_EQ(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just confirming that you got the same results with Presto Java.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I've copied these values to the Presto Java UUID tests to run and get the same results.

true,
uuidEval(
"33355449-2c7d-43d7-967a-f53cd23215ad",
"<",
"ffffffff-ffff-ffff-ffff-ffffffffffff"));
ASSERT_EQ(
aditi-pandit marked this conversation as resolved.
Show resolved Hide resolved
false,
uuidEval(
"33355449-2c7d-43d7-967a-f53cd23215ad",
"<",
"00000000-0000-0000-0000-000000000000"));
ASSERT_EQ(
true,
uuidEval(
"f768f36d-4f09-4da7-a298-3564d8f3c986",
">",
"00000000-0000-0000-0000-000000000000"));
ASSERT_EQ(
false,
uuidEval(
"f768f36d-4f09-4da7-a298-3564d8f3c986",
">",
"ffffffff-ffff-ffff-ffff-ffffffffffff"));

ASSERT_EQ(
true,
uuidEval(
"33355449-2c7d-43d7-967a-f53cd23215ad",
"<=",
"33355449-2c7d-43d7-967a-f53cd23215ad"));
ASSERT_EQ(
true,
uuidEval(
"33355449-2c7d-43d7-967a-f53cd23215ad",
"<=",
"ffffffff-ffff-ffff-ffff-ffffffffffff"));
ASSERT_EQ(
true,
uuidEval(
"33355449-2c7d-43d7-967a-f53cd23215ad",
">=",
"33355449-2c7d-43d7-967a-f53cd23215ad"));
ASSERT_EQ(
true,
uuidEval(
"ffffffff-ffff-ffff-ffff-ffffffffffff",
">=",
"33355449-2c7d-43d7-967a-f53cd23215ad"));

ASSERT_EQ(
true,
uuidEval(
"f768f36d-4f09-4da7-a298-3564d8f3c986",
"==",
"f768f36d-4f09-4da7-a298-3564d8f3c986"));
ASSERT_EQ(
true,
uuidEval(
"eed9f812-4b0c-472f-8a10-4ae7bff79a47",
"!=",
"f768f36d-4f09-4da7-a298-3564d8f3c986"));

ASSERT_EQ(
true,
uuidEval(
"11000000-0000-0022-0000-000000000000",
"<",
"22000000-0000-0011-0000-000000000000"));
ASSERT_EQ(
true,
uuidEval(
"00000000-0000-0000-2200-000000000011",
">",
"00000000-0000-0000-1100-000000000022"));
ASSERT_EQ(
false,
uuidEval(
"00000000-0000-0000-0000-000000000011",
">",
"22000000-0000-0000-0000-000000000000"));
ASSERT_EQ(
false,
uuidEval(
"11000000-0000-0000-0000-000000000000",
"<",
"00000000-0000-0000-0000-000000000022"));

std::string lhs = "12342345-3456-4567-5678-678978908901";
std::string rhs = "23451234-4567-3456-6789-567889017890";
ASSERT_EQ(true, uuidEval(lhs, "<", rhs));

for (vector_size_t i = 0; i < lhs.size(); i++) {
if (lhs[i] == '-') {
continue;
}
lhs[i] = '0';
rhs[i] = '0';
bool expected = boost::lexical_cast<boost::uuids::uuid>(lhs) <
boost::lexical_cast<boost::uuids::uuid>(rhs);
ASSERT_EQ(expected, uuidEval(lhs, "<", rhs));
}
}

} // namespace
} // namespace facebook::velox::functions::prestosql
Loading