Skip to content

Commit

Permalink
Make split_to_map throw when more than one delimiter is found (facebo…
Browse files Browse the repository at this point in the history
…okincubator#10691)

Summary:
Pull Request resolved: facebookincubator#10691

Presto throws when there is more than one delimiter so Velox should do the same.

Reviewed By: gggrace14

Differential Revision: D60995768
  • Loading branch information
Daniel Hunte authored and facebook-github-bot committed Aug 9, 2024
1 parent 1c538f2 commit 484fde6
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 14 deletions.
7 changes: 6 additions & 1 deletion velox/functions/prestosql/SplitToMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,12 @@ struct SplitToMapFunction {
VELOX_RETURN_IF(
delimiterPos == std::string::npos,
Status::UserError(
"Key-value delimiter must appear exactly once in each entry. Bad input: '{}'",
"No delimiter found. Key-value delimiter must appear exactly once in each entry. Bad input: '{}'",
entry));
VELOX_RETURN_IF(
entry.find(keyValueDelimiter, delimiterPos + 1) != std::string::npos,
Status::UserError(
"More than one delimiter found. Key-value delimiter must appear exactly once in each entry. Bad input: '{}'",
entry));

const auto key = std::string_view(entry.data(), delimiterPos);
Expand Down
38 changes: 25 additions & 13 deletions velox/functions/prestosql/tests/SplitToMapTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,17 @@ TEST_F(SplitToMapTest, invalidInput) {
});

auto splitToMap = [&](const std::string& entryDelimiter,
const std::string& keyValueDelimiter) {
const std::string& keyValueDelimiter,
RowVectorPtr data) {
evaluate(
fmt::format(
"split_to_map(c0, '{}', '{}')", entryDelimiter, keyValueDelimiter),
data);
};

auto trySplitToMap = [&](const std::string& entryDelimiter,
const std::string& keyValueDelimiter) {
const std::string& keyValueDelimiter,
const RowVectorPtr& data) {
SCOPED_TRACE(fmt::format("{} {}", entryDelimiter, keyValueDelimiter));
auto result = evaluate(
fmt::format(
Expand All @@ -109,24 +111,34 @@ TEST_F(SplitToMapTest, invalidInput) {
};

VELOX_ASSERT_THROW(
splitToMap(".", "."),
splitToMap(".", ".", data),
"entryDelimiter and keyValueDelimiter must not be the same");
trySplitToMap(".", ".");
trySplitToMap(".", ".", data);

VELOX_ASSERT_THROW(splitToMap(".", ""), "keyValueDelimiter is empty");
trySplitToMap(".", "");
VELOX_ASSERT_THROW(splitToMap(".", "", data), "keyValueDelimiter is empty");
trySplitToMap(".", "", data);

VELOX_ASSERT_THROW(splitToMap("", "."), "entryDelimiter is empty");
trySplitToMap("", ".");
VELOX_ASSERT_THROW(splitToMap("", ".", data), "entryDelimiter is empty");
trySplitToMap("", ".", data);

VELOX_ASSERT_THROW(
splitToMap(":", ","),
"Key-value delimiter must appear exactly once in each entry. Bad input: '1'");
trySplitToMap(":", ",");
splitToMap(":", ",", data),
"No delimiter found. Key-value delimiter must appear exactly once in each entry. Bad input: '1'");
trySplitToMap(":", ",", data);

VELOX_ASSERT_THROW(
splitToMap(",", ":"), "Duplicate keys (1) are not allowed.");
trySplitToMap(",", ":");
splitToMap(",", ":", data), "Duplicate keys (1) are not allowed.");
trySplitToMap(",", ":", data);

data = makeRowVector({
makeFlatVector<std::string>({
"1::10,2:20,1:30",
}),
});
VELOX_ASSERT_THROW(
splitToMap(",", ":", data),
"More than one delimiter found. Key-value delimiter must appear exactly once in each entry");
trySplitToMap(",", ":", data);
}

TEST_F(SplitToMapTest, lambda) {
Expand Down

0 comments on commit 484fde6

Please sign in to comment.