Skip to content

Commit

Permalink
Add support for VARBINARY input to from_base64 Presto function (faceb…
Browse files Browse the repository at this point in the history
…ookincubator#10325)

Summary:
Pull Request resolved: facebookincubator#10325

Presto supports both VARCHAR and VARBINARY input to from_base64 function.

```
presto:di> show functions like 'from_base64';
  Function   | Return Type | Argument Types | Function Type | Deterministic |            Description            | Variable Arity | Built In | Temporary >
-------------+-------------+----------------+---------------+---------------+-----------------------------------+----------------+----------+----------->
 from_base64 | varbinary   | varbinary      | scalar        | true          | decode base64 encoded binary data | false          | true     | false     >
 from_base64 | varbinary   | varchar(x)     | scalar        | true          | decode base64 encoded binary data | false          | true     | false     >
(2 rows)
```

Reviewed By: amitkdutta

Differential Revision: D59070858

fbshipit-source-id: d160b7881fcee0857c4aa53042641563844a5c42
  • Loading branch information
mbasmanova authored and facebook-github-bot committed Jun 26, 2024
1 parent 7add4bf commit 31800f5
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 17 deletions.
26 changes: 12 additions & 14 deletions velox/functions/prestosql/BinaryFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,21 +282,19 @@ struct ToBase64Function {
}
};

template <typename T>
template <typename TExec>
struct FromBase64Function {
VELOX_DEFINE_FUNCTION_TYPES(T);
FOLLY_ALWAYS_INLINE void call(
out_type<Varbinary>& result,
const arg_type<Varchar>& input) {
try {
auto inputSize = input.size();
result.resize(
encoding::Base64::calculateDecodedSize(input.data(), inputSize));
encoding::Base64::decode(
input.data(), inputSize, result.data(), result.size());
} catch (const encoding::Base64Exception& e) {
VELOX_USER_FAIL(e.what());
}
VELOX_DEFINE_FUNCTION_TYPES(TExec);

// T can be either arg_type<Varchar> or arg_type<Varbinary>. These are the
// same, but hard-coding one of them might be confusing.
template <typename T>
FOLLY_ALWAYS_INLINE void call(out_type<Varbinary>& result, const T& input) {
auto inputSize = input.size();
result.resize(
encoding::Base64::calculateDecodedSize(input.data(), inputSize));
encoding::Base64::decode(
input.data(), inputSize, result.data(), result.size());
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,12 @@ void registerSimpleFunctions(const std::string& prefix) {
registerFunction<FromHexFunction, Varbinary, Varchar>({prefix + "from_hex"});
registerFunction<ToBase64Function, Varchar, Varbinary>(
{prefix + "to_base64"});

registerFunction<FromBase64Function, Varbinary, Varchar>(
{prefix + "from_base64"});
registerFunction<FromBase64Function, Varbinary, Varbinary>(
{prefix + "from_base64"});

registerFunction<ToBase64UrlFunction, Varchar, Varbinary>(
{prefix + "to_base64url"});
registerFunction<FromBase64UrlFunction, Varbinary, Varchar>(
Expand Down
23 changes: 20 additions & 3 deletions velox/functions/prestosql/tests/BinaryFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,20 @@ TEST_F(BinaryFunctionsTest, toBase64Url) {

TEST_F(BinaryFunctionsTest, fromBase64) {
const auto fromBase64 = [&](std::optional<std::string> value) {
return evaluateOnce<std::string>("from_base64(c0)", value);
// from_base64 allows VARCHAR and VARBINARY inputs.
auto result =
evaluateOnce<std::string>("from_base64(c0)", VARCHAR(), value);
auto otherResult =
evaluateOnce<std::string>("from_base64(c0)", VARBINARY(), value);

VELOX_CHECK_EQ(result.has_value(), otherResult.has_value());

if (!result.has_value()) {
return result;
}

VELOX_CHECK_EQ(result.value(), otherResult.value());
return result;
};

EXPECT_EQ(std::nullopt, fromBase64(std::nullopt));
Expand All @@ -424,8 +437,12 @@ TEST_F(BinaryFunctionsTest, fromBase64) {
"Hello World from Velox!",
fromBase64("SGVsbG8gV29ybGQgZnJvbSBWZWxveCE="));

EXPECT_THROW(fromBase64("YQ="), VeloxUserError);
EXPECT_THROW(fromBase64("YQ==="), VeloxUserError);
VELOX_ASSERT_USER_THROW(
fromBase64("YQ="),
"Base64::decode() - invalid input string: string length is not a multiple of 4.");
VELOX_ASSERT_USER_THROW(
fromBase64("YQ==="),
"Base64::decode() - invalid input string: string length is not a multiple of 4.");

// Check encoded strings without padding
EXPECT_EQ("a", fromBase64("YQ"));
Expand Down

0 comments on commit 31800f5

Please sign in to comment.