Skip to content

Commit

Permalink
Fix url_extract_path Presto function (facebookincubator#6657)
Browse files Browse the repository at this point in the history
Summary:
This function needs to unescape the path after extraction.

Pull Request resolved: facebookincubator#6657

Reviewed By: kgpai

Differential Revision: D49471136

Pulled By: mbasmanova

fbshipit-source-id: 95d6479a8ec0a895dacc573d7983c14d9a449c57
  • Loading branch information
mbasmanova authored and codyschierbeck committed Sep 27, 2023
1 parent fd8c0a1 commit 62bce19
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 23 deletions.
18 changes: 7 additions & 11 deletions velox/functions/prestosql/URLFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -242,19 +242,15 @@ template <typename T>
struct UrlExtractPathFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

// Results refer to strings in the first argument.
static constexpr int32_t reuse_strings_from_arg = 0;

// ASCII input always produces ASCII result.
static constexpr bool is_default_ascii_behavior = true;
// Input is always ASCII, but result may or may not be ASCII.

FOLLY_ALWAYS_INLINE bool call(
FOLLY_ALWAYS_INLINE void call(
out_type<Varchar>& result,
const arg_type<Varchar>& url) {
boost::cmatch match;
if (!parse(url, match)) {
result.setEmpty();
return true;
return;
}

boost::cmatch authAndPathMatch;
Expand All @@ -263,14 +259,14 @@ struct UrlExtractPathFunction {

if (matchAuthorityAndPath(
match, authAndPathMatch, authorityMatch, hasAuthority)) {
StringView escapedPath;
if (hasAuthority) {
result.setNoCopy(submatch(authAndPathMatch, 2));
escapedPath = submatch(authAndPathMatch, 2);
} else {
result.setNoCopy(submatch(match, 2));
escapedPath = submatch(match, 2);
}
urlUnescape(result, escapedPath);
}

return true;
}
};

Expand Down
34 changes: 22 additions & 12 deletions velox/functions/prestosql/tests/URLFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,26 @@
#include <gmock/gmock.h>
#include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h"

using string_t = std::string;

namespace facebook::velox {

namespace {
class URLFunctionsTest : public functions::test::FunctionBaseTest {
protected:
void validate(
const string_t& url,
const string_t& expectedProtocol,
const string_t& expectedHost,
const string_t& expectedPath,
const string_t& expectedFragment,
const string_t& expectedQuery,
const std::string& url,
const std::string& expectedProtocol,
const std::string& expectedHost,
const std::string& expectedPath,
const std::string& expectedFragment,
const std::string& expectedQuery,
const std::optional<int32_t> expectedPort) {
const auto extractFn = [&](const string_t& fn,
const std::optional<string_t>& a) {
return evaluateOnce<string_t>(fmt::format("url_extract_{}(c0)", fn), a);
const auto extractFn = [&](const std::string& fn,
const std::optional<std::string>& a) {
return evaluateOnce<std::string>(
fmt::format("url_extract_{}(c0)", fn), a);
};

const auto extractPort = [&](const std::optional<string_t>& a) {
const auto extractPort = [&](const std::optional<std::string>& a) {
return evaluateOnce<int64_t>("url_extract_port(c0)", a);
};

Expand Down Expand Up @@ -101,6 +100,17 @@ TEST_F(URLFunctionsTest, validateURL) {
validate("foo", "", "", "", "", "", std::nullopt);
}

TEST_F(URLFunctionsTest, extractPath) {
const auto extractPath = [&](const std::optional<std::string>& url) {
return evaluateOnce<std::string>("url_extract_path(c0)", url);
};

ASSERT_EQ(
"/media/set/Books and Magazines.php",
extractPath(
"https://www.cnn.com/media/set/Books%20and%20Magazines.php?foo=bar"));
}

TEST_F(URLFunctionsTest, extractParameter) {
const auto extractParam = [&](const std::optional<std::string>& a,
const std::optional<std::string>& b) {
Expand Down

0 comments on commit 62bce19

Please sign in to comment.