From 172fef24e3fb9d180aa3c2cd9a51b08d102b35cd Mon Sep 17 00:00:00 2001 From: Daniel Hunte Date: Fri, 9 Aug 2024 10:02:59 -0700 Subject: [PATCH] Make split_to_map throw when more than one delimiter is found (#10691) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/10691 Presto throws when there is more than one delimiter so Velox should do the same. Reviewed By: gggrace14 Differential Revision: D60995768 fbshipit-source-id: 8411777d4672118f0a3f07080b6fccdd82ff7dc0 --- velox/functions/prestosql/SplitToMap.h | 7 +++- .../prestosql/tests/SplitToMapTest.cpp | 38 ++++++++++++------- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/velox/functions/prestosql/SplitToMap.h b/velox/functions/prestosql/SplitToMap.h index 8379d822d3d8..3dd438153e2c 100644 --- a/velox/functions/prestosql/SplitToMap.h +++ b/velox/functions/prestosql/SplitToMap.h @@ -140,7 +140,12 @@ struct SplitToMapFunction { VELOX_RETURN_IF( delimiterPos == std::string::npos, Status::UserError( - "Key-value delimiter must appear exactly once in each entry. Bad input: '{}'", + "No delimiter found. Key-value delimiter must appear exactly once in each entry. Bad input: '{}'", + entry)); + VELOX_RETURN_IF( + entry.find(keyValueDelimiter, delimiterPos + 1) != std::string::npos, + Status::UserError( + "More than one delimiter found. Key-value delimiter must appear exactly once in each entry. Bad input: '{}'", entry)); const auto key = std::string_view(entry.data(), delimiterPos); diff --git a/velox/functions/prestosql/tests/SplitToMapTest.cpp b/velox/functions/prestosql/tests/SplitToMapTest.cpp index 5034f4f8b68a..1e9ba010913a 100644 --- a/velox/functions/prestosql/tests/SplitToMapTest.cpp +++ b/velox/functions/prestosql/tests/SplitToMapTest.cpp @@ -88,7 +88,8 @@ TEST_F(SplitToMapTest, invalidInput) { }); auto splitToMap = [&](const std::string& entryDelimiter, - const std::string& keyValueDelimiter) { + const std::string& keyValueDelimiter, + RowVectorPtr data) { evaluate( fmt::format( "split_to_map(c0, '{}', '{}')", entryDelimiter, keyValueDelimiter), @@ -96,7 +97,8 @@ TEST_F(SplitToMapTest, invalidInput) { }; auto trySplitToMap = [&](const std::string& entryDelimiter, - const std::string& keyValueDelimiter) { + const std::string& keyValueDelimiter, + const RowVectorPtr& data) { SCOPED_TRACE(fmt::format("{} {}", entryDelimiter, keyValueDelimiter)); auto result = evaluate( fmt::format( @@ -109,24 +111,34 @@ TEST_F(SplitToMapTest, invalidInput) { }; VELOX_ASSERT_THROW( - splitToMap(".", "."), + splitToMap(".", ".", data), "entryDelimiter and keyValueDelimiter must not be the same"); - trySplitToMap(".", "."); + trySplitToMap(".", ".", data); - VELOX_ASSERT_THROW(splitToMap(".", ""), "keyValueDelimiter is empty"); - trySplitToMap(".", ""); + VELOX_ASSERT_THROW(splitToMap(".", "", data), "keyValueDelimiter is empty"); + trySplitToMap(".", "", data); - VELOX_ASSERT_THROW(splitToMap("", "."), "entryDelimiter is empty"); - trySplitToMap("", "."); + VELOX_ASSERT_THROW(splitToMap("", ".", data), "entryDelimiter is empty"); + trySplitToMap("", ".", data); VELOX_ASSERT_THROW( - splitToMap(":", ","), - "Key-value delimiter must appear exactly once in each entry. Bad input: '1'"); - trySplitToMap(":", ","); + splitToMap(":", ",", data), + "No delimiter found. Key-value delimiter must appear exactly once in each entry. Bad input: '1'"); + trySplitToMap(":", ",", data); VELOX_ASSERT_THROW( - splitToMap(",", ":"), "Duplicate keys (1) are not allowed."); - trySplitToMap(",", ":"); + splitToMap(",", ":", data), "Duplicate keys (1) are not allowed."); + trySplitToMap(",", ":", data); + + data = makeRowVector({ + makeFlatVector({ + "1::10,2:20,1:30", + }), + }); + VELOX_ASSERT_THROW( + splitToMap(",", ":", data), + "More than one delimiter found. Key-value delimiter must appear exactly once in each entry"); + trySplitToMap(",", ":", data); } TEST_F(SplitToMapTest, lambda) {