From 86467e0737f3d3f991b2b5d4c8eed237b3efd3b9 Mon Sep 17 00:00:00 2001 From: Daniel Hunte Date: Thu, 8 Aug 2024 16:36:48 -0700 Subject: [PATCH] Make split_to_map throw when more than one delimiter is found Summary: Presto throws when there are more than one delimiter so Velox should do the same. Differential Revision: D60995768 --- velox/functions/prestosql/SplitToMap.h | 9 +++-- .../prestosql/tests/SplitToMapTest.cpp | 38 ++++++++++++------- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/velox/functions/prestosql/SplitToMap.h b/velox/functions/prestosql/SplitToMap.h index 8379d822d3d83..7dc7be39e514e 100644 --- a/velox/functions/prestosql/SplitToMap.h +++ b/velox/functions/prestosql/SplitToMap.h @@ -88,11 +88,9 @@ struct SplitToMapFunction { Status::UserError( "entryDelimiter and keyValueDelimiter must not be the same: {}", entryDelimiter)); - if (input.empty()) { return Status::OK(); } - size_t pos = 0; folly::F14FastMap keyValuePairs; @@ -140,7 +138,12 @@ struct SplitToMapFunction { VELOX_RETURN_IF( delimiterPos == std::string::npos, Status::UserError( - "Key-value delimiter must appear exactly once in each entry. Bad input: '{}'", + "No delimiter found. Key-value delimiter must appear exactly once in each entry. Bad input: '{}'", + entry)); + VELOX_RETURN_IF( + entry.find(keyValueDelimiter, delimiterPos + 1) != std::string::npos, + Status::UserError( + "More than one delimiter found. Key-value delimiter must appear exactly once in each entry. Bad input: '{}'", entry)); const auto key = std::string_view(entry.data(), delimiterPos); diff --git a/velox/functions/prestosql/tests/SplitToMapTest.cpp b/velox/functions/prestosql/tests/SplitToMapTest.cpp index 5034f4f8b68a4..0f34d294a5a58 100644 --- a/velox/functions/prestosql/tests/SplitToMapTest.cpp +++ b/velox/functions/prestosql/tests/SplitToMapTest.cpp @@ -88,7 +88,8 @@ TEST_F(SplitToMapTest, invalidInput) { }); auto splitToMap = [&](const std::string& entryDelimiter, - const std::string& keyValueDelimiter) { + const std::string& keyValueDelimiter, + RowVectorPtr data) { evaluate( fmt::format( "split_to_map(c0, '{}', '{}')", entryDelimiter, keyValueDelimiter), @@ -96,7 +97,8 @@ TEST_F(SplitToMapTest, invalidInput) { }; auto trySplitToMap = [&](const std::string& entryDelimiter, - const std::string& keyValueDelimiter) { + const std::string& keyValueDelimiter, + RowVectorPtr data) { SCOPED_TRACE(fmt::format("{} {}", entryDelimiter, keyValueDelimiter)); auto result = evaluate( fmt::format( @@ -109,24 +111,34 @@ TEST_F(SplitToMapTest, invalidInput) { }; VELOX_ASSERT_THROW( - splitToMap(".", "."), + splitToMap(".", ".", data), "entryDelimiter and keyValueDelimiter must not be the same"); - trySplitToMap(".", "."); + trySplitToMap(".", ".", data); - VELOX_ASSERT_THROW(splitToMap(".", ""), "keyValueDelimiter is empty"); - trySplitToMap(".", ""); + VELOX_ASSERT_THROW(splitToMap(".", "", data), "keyValueDelimiter is empty"); + trySplitToMap(".", "", data); - VELOX_ASSERT_THROW(splitToMap("", "."), "entryDelimiter is empty"); - trySplitToMap("", "."); + VELOX_ASSERT_THROW(splitToMap("", ".", data), "entryDelimiter is empty"); + trySplitToMap("", ".", data); VELOX_ASSERT_THROW( - splitToMap(":", ","), - "Key-value delimiter must appear exactly once in each entry. Bad input: '1'"); - trySplitToMap(":", ","); + splitToMap(":", ",", data), + "No delimiter found. Key-value delimiter must appear exactly once in each entry. Bad input: '1'"); + trySplitToMap(":", ",", data); VELOX_ASSERT_THROW( - splitToMap(",", ":"), "Duplicate keys (1) are not allowed."); - trySplitToMap(",", ":"); + splitToMap(",", ":", data), "Duplicate keys (1) are not allowed."); + trySplitToMap(",", ":", data); + + data = makeRowVector({ + makeFlatVector({ + "1::10,2:20,1:30", + }), + }); + VELOX_ASSERT_THROW( + splitToMap(",", ":", data), + "More than one delimiter found. Key-value delimiter must appear exactly once in each entry"); + trySplitToMap(",", ":", data); } TEST_F(SplitToMapTest, lambda) {