From 01ecc1e87dfd5b639bd03fa02a7e25b0f85bb883 Mon Sep 17 00:00:00 2001 From: gouyikui Date: Fri, 12 Apr 2024 17:50:34 +0800 Subject: [PATCH] velox split function support pattern of string type --- velox/docs/functions/spark/string.rst | 11 +- velox/functions/lib/Re2Functions.cpp | 125 +++++++++++++++ velox/functions/lib/Re2Functions.h | 14 ++ .../functions/lib/tests/Re2FunctionsTest.cpp | 126 +++++++++++++++ velox/functions/sparksql/CMakeLists.txt | 1 - velox/functions/sparksql/Register.cpp | 3 +- velox/functions/sparksql/SplitFunctions.cpp | 143 ------------------ velox/functions/sparksql/tests/CMakeLists.txt | 1 - .../sparksql/tests/SplitFunctionsTest.cpp | 109 ------------- 9 files changed, 274 insertions(+), 259 deletions(-) delete mode 100644 velox/functions/sparksql/SplitFunctions.cpp delete mode 100644 velox/functions/sparksql/tests/SplitFunctionsTest.cpp diff --git a/velox/docs/functions/spark/string.rst b/velox/docs/functions/spark/string.rst index 817eaad34ab29..32042f816bf75 100644 --- a/velox/docs/functions/spark/string.rst +++ b/velox/docs/functions/spark/string.rst @@ -184,18 +184,21 @@ Unless specified otherwise, all functions return NULL if at least one of the arg SELECT rtrim('kr', 'spark'); -- "spa" -.. spark:function:: split(string, delimiter) -> array(string) +.. spark:function:: split(string, regex) -> array(string) - Splits ``string`` on ``delimiter`` and returns an array. :: + Returns an array by splitting ``string`` as many times as possible. + The delimiter is any string matching regex, supported by re2. + This is equivalent to split(string, regex, -1), -1 is used for limit. :: SELECT split('oneAtwoBthreeC', '[ABC]'); -- ["one","two","three",""] SELECT split('one', ''); -- ["o", "n", "e", ""] SELECT split('one', '1'); -- ["one"] -.. spark:function:: split(string, delimiter, limit) -> array(string) +.. spark:function:: split(string, regex, limit) -> array(string) :noindex: - Splits ``string`` on ``delimiter`` and returns an array of size at most ``limit``. :: + Splits ``string`` on ``regex`` and returns an array of size at most ``limit``. + If limit is negative, ``string`` will be split as many times as possible. :: SELECT split('oneAtwoBthreeC', '[ABC]', -1); -- ["one","two","three",""] SELECT split('oneAtwoBthreeC', '[ABC]', 0); -- ["one", "two", "three", ""] diff --git a/velox/functions/lib/Re2Functions.cpp b/velox/functions/lib/Re2Functions.cpp index f250c8ddeffd5..30f3b00088ad9 100644 --- a/velox/functions/lib/Re2Functions.cpp +++ b/velox/functions/lib/Re2Functions.cpp @@ -1185,6 +1185,83 @@ class Re2ExtractAll final : public exec::VectorFunction { mutable ReCache cache_; }; +void re2SplitAll( + exec::VectorWriter>& resultWriter, + const RE2& re, + const exec::LocalDecodedVector& inputStrs, + const int row, + std::vector& groups) { + resultWriter.setOffset(row); + + auto& arrayWriter = resultWriter.current(); + + const StringView str = inputStrs->valueAt(row); + const re2::StringPiece input = toStringPiece(str); + size_t pos = 0; + + while ( + re.Match(input, pos, input.size(), RE2::UNANCHORED, groups.data(), 1)) { + const re2::StringPiece fullMatch = groups[0]; + const re2::StringPiece subMatch = + input.substr(pos, fullMatch.data() - input.data() - pos); + + arrayWriter.add_item().setNoCopy( + StringView(subMatch.data(), subMatch.size())); + pos = fullMatch.data() + fullMatch.size() - input.data(); + if (UNLIKELY(fullMatch.size() == 0)) { + ++pos; + } + } + + if (pos < input.size()) { + const re2::StringPiece remaining = input.substr(pos); + arrayWriter.add_item().setNoCopy( + StringView(remaining.data(), remaining.size())); + } else if (pos == input.size()) { + arrayWriter.add_item().setNoCopy(StringView(nullptr, 0)); + } + + resultWriter.commit(); +} + +class Re2SplitAllConstantPattern final : public exec::VectorFunction { + public: + Re2SplitAllConstantPattern(StringView pattern) + : re_(toStringPiece(pattern), RE2::Quiet) { + checkForBadPattern(re_); + } + + void apply( + const SelectivityVector& rows, + std::vector& args, + const TypePtr& /* outputType */, + exec::EvalCtx& context, + VectorPtr& resultRef) const final { + BaseVector::ensureWritable( + rows, ARRAY(VARCHAR()), context.pool(), resultRef); + exec::VectorWriter> resultWriter; + resultWriter.init(*resultRef->as()); + + exec::LocalDecodedVector inputStrs(context, *args[0], rows); + FOLLY_DECLARE_REUSED(groups, std::vector); + groups.resize(1); + + context.applyToSelectedNoThrow(rows, [&](vector_size_t row) { + re2SplitAll(resultWriter, re_, inputStrs, row, groups); + }); + + resultWriter.finish(); + + resultRef->as() + ->elements() + ->asFlatVector() + ->acquireSharedStringBuffers(inputStrs->base()); + } + + private: + RE2 re_; +}; + template std::shared_ptr makeRe2MatchImpl( const std::string& name, @@ -1935,4 +2012,52 @@ re2ExtractAllSignatures() { }; } +std::shared_ptr makeRe2SplitAll( + const std::string& name, + const std::vector& inputArgs, + const core::QueryConfig& /*config*/) { + auto numArgs = inputArgs.size(); + VELOX_USER_CHECK_EQ( + numArgs, 2, "{} requires 2 arguments, but got {}", name, numArgs); + + VELOX_USER_CHECK( + inputArgs[0].type->isVarchar(), + "{} requires first argument of type VARCHAR, but got {}", + name, + inputArgs[0].type->toString()); + + VELOX_USER_CHECK( + inputArgs[1].type->isVarchar(), + "{} requires second argument of type VARCHAR, but got {}", + name, + inputArgs[1].type->toString()); + + BaseVector* constantPattern = inputArgs[1].constantValue.get(); + VELOX_USER_CHECK( + constantPattern != nullptr && !constantPattern->isNullAt(0), + "{} requires second argument of constant, but got {}", + name, + inputArgs[1].type->toString()); + + auto pattern = constantPattern->as>()->valueAt(0); + + try { + return std::make_shared(pattern); + } catch (...) { + return std::make_shared( + std::current_exception()); + } +} + +std::vector> re2SplitAllSignatures() { + // varchar, varchar -> array + return { + exec::FunctionSignatureBuilder() + .returnType("array(varchar)") + .argumentType("varchar") + .constantArgumentType("varchar") + .build(), + }; +} + } // namespace facebook::velox::functions diff --git a/velox/functions/lib/Re2Functions.h b/velox/functions/lib/Re2Functions.h index 9ce5e17903e05..eccc243945caa 100644 --- a/velox/functions/lib/Re2Functions.h +++ b/velox/functions/lib/Re2Functions.h @@ -237,6 +237,20 @@ std::shared_ptr makeRe2ExtractAll( std::vector> re2ExtractAllSignatures(); +/// re2SplitAll(string, pattern) → array +/// +/// Returns an array by splitting string as many times as possible. +/// The pattern is any string matching regex. +/// +/// If the pattern is invalid or not constant, throws an exception. +/// If the pattern does not match, returns original string as array. +std::shared_ptr makeRe2SplitAll( + const std::string& name, + const std::vector& inputArgs, + const core::QueryConfig& config); + +std::vector> re2SplitAllSignatures(); + /// regexp_replace(string, pattern, replacement) -> string /// regexp_replace(string, pattern) -> string /// diff --git a/velox/functions/lib/tests/Re2FunctionsTest.cpp b/velox/functions/lib/tests/Re2FunctionsTest.cpp index bb7503d74b5fb..808c302083fad 100644 --- a/velox/functions/lib/tests/Re2FunctionsTest.cpp +++ b/velox/functions/lib/tests/Re2FunctionsTest.cpp @@ -57,6 +57,8 @@ class Re2FunctionsTest : public test::FunctionBaseTest { exec::registerStatefulVectorFunction( "re2_extract_all", re2ExtractAllSignatures(), makeRe2ExtractAll); exec::registerStatefulVectorFunction("like", likeSignatures(), makeLike); + exec::registerStatefulVectorFunction( + "re2_split_all", re2SplitAllSignatures(), makeRe2SplitAll); } protected: @@ -85,6 +87,11 @@ class Re2FunctionsTest : public test::FunctionBaseTest { return output; } + void testRe2SplitAll( + const std::vector>& inputs, + const std::string& pattern, + const std::vector>>& output); + void testLike( const std::string& input, const std::string& pattern, @@ -1471,5 +1478,124 @@ TEST_F(Re2FunctionsTest, limit) { ASSERT_NO_THROW(evaluate("regexp_like(c0, c2)", data)); } +void Re2FunctionsTest::testRe2SplitAll( + const std::vector>& inputs, + const std::string& pattern, + const std::vector>>& output) { + auto result = [&] { + auto input = makeFlatVector( + inputs.size(), + [&inputs](vector_size_t row) { + return inputs[row] ? StringView(*inputs[row]) : StringView(); + }, + [&inputs](vector_size_t row) { return !inputs[row].has_value(); }); + + // Constant pattern. + std::string constantPattern = std::string(", '") + pattern + "'"; + std::string expression = + std::string("re2_split_all(c0") + constantPattern + ")"; + return evaluate(expression, makeRowVector({input})); + }(); + + // Creating vectors for output string vectors. + auto sizeAtOutput = [&output](vector_size_t row) { + return output[row] ? output[row]->size() : 0; + }; + auto valueAtOutput = [&output](vector_size_t row, vector_size_t idx) { + return output[row] ? StringView(output[row]->at(idx)) : StringView(""); + }; + auto nullAtOutput = [&output](vector_size_t row) { + return !output[row].has_value(); + }; + auto expectedResult = makeArrayVector( + output.size(), sizeAtOutput, valueAtOutput, nullAtOutput); + + // Checking the results. + assertEqualVectors(expectedResult, result); +} + +TEST_F(Re2FunctionsTest, regexSpiltAllSingleCharPattern) { + // _ + testRe2SplitAll({"abc_ta"}, {"_"}, {{{"abc", "ta"}}}); + testRe2SplitAll({"_abc_ta_"}, {"_"}, {{{"", "abc", "ta", ""}}}); + testRe2SplitAll({"abc_ta "}, {"_"}, {{{"abc", "ta "}}}); + testRe2SplitAll({" abc_ta "}, {"_"}, {{{" abc", "ta "}}}); + + // . + testRe2SplitAll({"abc"}, {"."}, {{{"", "", "", ""}}}); + testRe2SplitAll({"abc "}, {"."}, {{{"", "", "", "", ""}}}); + testRe2SplitAll({" abc "}, {"."}, {{{"", "", "", "", "", ""}}}); + + // \\. + testRe2SplitAll({"abc"}, {"\\."}, {{{"abc"}}}); + testRe2SplitAll({"abc "}, {"\\."}, {{{"abc "}}}); + testRe2SplitAll({" abc "}, {"\\."}, {{{" abc "}}}); + + // \\| + testRe2SplitAll({"abt|sc"}, {"\\|"}, {{{"abt", "sc"}}}); + testRe2SplitAll({"|abc| "}, {"\\|"}, {{{"", "abc", " "}}}); + testRe2SplitAll({" |ab|c | "}, {"\\|"}, {{{" ", "ab", "c ", " "}}}); +} + +TEST_F(Re2FunctionsTest, regexSpiltAllSequenceCharPattern) { + testRe2SplitAll({"dafefaatb"}, {"fe"}, {{{"da", "faatb"}}}); + testRe2SplitAll({"abc_ta"}, {"abc_ta_t"}, {{{"abc_ta"}}}); + testRe2SplitAll({"abc dt dat"}, {" dt"}, {{{"abc", " dat"}}}); + + testRe2SplitAll({"absdfghabiefjab"}, {"ab"}, {{{"", "sdfgh", "iefj", ""}}}); + testRe2SplitAll( + {" absdfgha biefjab "}, {"ab"}, {{{" ", "sdfgha biefj", " "}}}); +} + +TEST_F(Re2FunctionsTest, regexSpiltAllRegexSequencePattern) { + const std::vector> inputs = { + " 123a 2b 14m ", "123a 2b 14m", "123a2b14m"}; + const std::string constantPattern = "(\\d+)([a-z]+)"; + const std::vector>> expectedOutputs = { + {{" ", " ", " ", " "}}, + {{"", " ", " ", ""}}, + {{"", "", "", ""}}}; + + testRe2SplitAll(inputs, constantPattern, expectedOutputs); + + testRe2SplitAll({"aa2bb3cc4"}, {"[1-9]+"}, {{{"aa", "bb", "cc", ""}}}); + testRe2SplitAll({""}, {"[0-9]+"}, {{{""}}}); + testRe2SplitAll({"abcde"}, {"[0-9]+"}, {{{"abcde"}}}); + testRe2SplitAll({"abcde"}, {"\\d+"}, {{{"abcde"}}}); + testRe2SplitAll({"23544"}, {"\\w+"}, {{{"", ""}}}); + testRe2SplitAll({"(╯°□°)╯︵ ┻━┻"}, {"[0-9]+"}, {{{"(╯°□°)╯︵ ┻━┻"}}}); +} + +TEST_F(Re2FunctionsTest, regexSplitAllNonAscii) { + testRe2SplitAll( + {"\u82f9\u679c\u9999\u8549\u0076\u0065\u006c\u006f\u0078\u6a58\u5b50"}, + {"\u9999\u8549"}, + {{{"\u82f9\u679c", "\u0076\u0065\u006c\u006f\u0078\u6a58\u5b50"}}}); + + testRe2SplitAll( + {"\u82f9\u679c\u9999\u8549\u0076\u0065\u006c\u006f\u0078\u6a58\u5b50"}, + {"\u0076\u0065\u006c\u006f\u0078"}, + {{{"\u82f9\u679c\u9999\u8549", "\u6a58\u5b50"}}}); + + testRe2SplitAll( + {"\u6d4b\u8bd5\u0076\u0065\u006c\u006f\u0078"}, + {"velox"}, + {{{"\u6d4b\u8bd5", ""}}}); + + testRe2SplitAll( + {"\u6d4b\u8bd5\u0076\u0065\u006c\u006f\u0078\u0020"}, + {"velox"}, + {{{"\u6d4b\u8bd5", " "}}}); + + testRe2SplitAll( + {"\u0076\u0065\u006c\u006f\u0078\u6d4b\u8bd5"}, + {"\u6d4b\u8bd5"}, + {{{"velox", ""}}}); + + testRe2SplitAll({"苹果香蕉velox橘子 "}, {"velox"}, {{{"苹果香蕉", "橘子 "}}}); + + testRe2SplitAll({"苹果香蕉velox橘子 "}, {"橘子"}, {{{"苹果香蕉velox", " "}}}); +} + } // namespace } // namespace facebook::velox::functions diff --git a/velox/functions/sparksql/CMakeLists.txt b/velox/functions/sparksql/CMakeLists.txt index 6ba9c03c6c790..09adf88a1df55 100644 --- a/velox/functions/sparksql/CMakeLists.txt +++ b/velox/functions/sparksql/CMakeLists.txt @@ -30,7 +30,6 @@ add_library( RegisterArithmetic.cpp RegisterCompare.cpp Size.cpp - SplitFunctions.cpp String.cpp UnscaledValueFunction.cpp) diff --git a/velox/functions/sparksql/Register.cpp b/velox/functions/sparksql/Register.cpp index 72a8ca5275684..b41a2ad16f1cf 100644 --- a/velox/functions/sparksql/Register.cpp +++ b/velox/functions/sparksql/Register.cpp @@ -227,7 +227,8 @@ void registerFunctions(const std::string& prefix) { prefix + "regexp_extract", re2ExtractSignatures(), makeRegexExtract); exec::registerStatefulVectorFunction( prefix + "rlike", re2SearchSignatures(), makeRLike); - VELOX_REGISTER_VECTOR_FUNCTION(udf_regexp_split, prefix + "split"); + exec::registerStatefulVectorFunction( + prefix + "split", re2SplitAllSignatures(), makeRe2SplitAll); exec::registerStatefulVectorFunction( prefix + "least", diff --git a/velox/functions/sparksql/SplitFunctions.cpp b/velox/functions/sparksql/SplitFunctions.cpp deleted file mode 100644 index 4d092e6928373..0000000000000 --- a/velox/functions/sparksql/SplitFunctions.cpp +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include "velox/expression/VectorFunction.h" -#include "velox/expression/VectorWriters.h" - -namespace facebook::velox::functions::sparksql { -namespace { - -/// This class only implements the basic split version in which the pattern is a -/// single character -class SplitCharacter final : public exec::VectorFunction { - public: - explicit SplitCharacter(const char pattern) : pattern_{pattern} { - static constexpr std::string_view kRegexChars = ".$|()[{^?*+\\"; - VELOX_CHECK( - kRegexChars.find(pattern) == std::string::npos, - "This version of split supports single-length non-regex patterns"); - } - - void apply( - const SelectivityVector& rows, - std::vector& args, - const TypePtr& /* outputType */, - exec::EvalCtx& context, - VectorPtr& result) const override { - exec::LocalDecodedVector input(context, *args[0], rows); - - BaseVector::ensureWritable(rows, ARRAY(VARCHAR()), context.pool(), result); - exec::VectorWriter> resultWriter; - resultWriter.init(*result->as()); - - rows.applyToSelected([&](vector_size_t row) { - resultWriter.setOffset(row); - auto& arrayWriter = resultWriter.current(); - - const StringView& current = input->valueAt(row); - const char* pos = current.begin(); - const char* end = pos + current.size(); - const char* delim; - do { - delim = std::find(pos, end, pattern_); - arrayWriter.add_item().setNoCopy(StringView(pos, delim - pos)); - pos = delim + 1; // Skip past delim. - } while (delim != end); - - resultWriter.commit(); - }); - - resultWriter.finish(); - - // Reference the input StringBuffers since we did not deep copy above. - result->as() - ->elements() - ->as>() - ->acquireSharedStringBuffers(args[0].get()); - } - - private: - const char pattern_; -}; - -/// This class will be updated in the future as we support more variants of -/// split -class Split final : public exec::VectorFunction { - public: - Split() {} - - void apply( - const SelectivityVector& rows, - std::vector& args, - const TypePtr& /* outputType */, - exec::EvalCtx& context, - VectorPtr& result) const override { - auto delimiterVector = args[1]->as>(); - VELOX_CHECK( - delimiterVector, "Split function supports only constant delimiter"); - auto patternString = args[1]->as>()->valueAt(0); - VELOX_CHECK_EQ( - patternString.size(), - 1, - "split only supports only single-character pattern"); - char pattern = patternString.data()[0]; - SplitCharacter splitCharacter(pattern); - splitCharacter.apply(rows, args, nullptr, context, result); - } -}; - -/// The function returns specialized version of split based on the constant -/// inputs. -/// \param inputArgs the inputs types (VARCHAR, VARCHAR, int64) and constant -/// values (if provided). -std::shared_ptr createSplit( - const std::string& /*name*/, - const std::vector& inputArgs, - const core::QueryConfig& /*config*/) { - BaseVector* constantPattern = inputArgs[1].constantValue.get(); - - if (inputArgs.size() > 3 || inputArgs[0].type->isVarchar() || - inputArgs[1].type->isVarchar() || (constantPattern == nullptr)) { - return std::make_shared(); - } - auto pattern = constantPattern->as>()->valueAt(0); - if (pattern.size() != 1) { - return std::make_shared(); - } - char charPattern = pattern.data()[0]; - // TODO: Add support for zero-length pattern, 2-character pattern - // TODO: add support for general regex pattern using R2 - return std::make_shared(charPattern); -} - -std::vector> signatures() { - // varchar, varchar -> array(varchar) - return {exec::FunctionSignatureBuilder() - .returnType("array(varchar)") - .argumentType("varchar") - .constantArgumentType("varchar") - .build()}; -} - -} // namespace - -VELOX_DECLARE_STATEFUL_VECTOR_FUNCTION( - udf_regexp_split, - signatures(), - createSplit); -} // namespace facebook::velox::functions::sparksql diff --git a/velox/functions/sparksql/tests/CMakeLists.txt b/velox/functions/sparksql/tests/CMakeLists.txt index e4881db973068..6723d552b3a9b 100644 --- a/velox/functions/sparksql/tests/CMakeLists.txt +++ b/velox/functions/sparksql/tests/CMakeLists.txt @@ -40,7 +40,6 @@ add_executable( SortArrayTest.cpp SparkCastExprTest.cpp SparkPartitionIdTest.cpp - SplitFunctionsTest.cpp StringTest.cpp StringToMapTest.cpp UnscaledValueFunctionTest.cpp diff --git a/velox/functions/sparksql/tests/SplitFunctionsTest.cpp b/velox/functions/sparksql/tests/SplitFunctionsTest.cpp deleted file mode 100644 index 8928849a44ce2..0000000000000 --- a/velox/functions/sparksql/tests/SplitFunctionsTest.cpp +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include "velox/functions/sparksql/tests/SparkFunctionBaseTest.h" - -namespace facebook::velox::functions::sparksql::test { - -using namespace facebook::velox::test; -namespace { - -class SplitTest : public SparkFunctionBaseTest { - protected: - void testSplitCharacter( - const std::vector>& input, - std::optional pattern, - const std::vector>>& output); -}; - -void SplitTest::testSplitCharacter( - const std::vector>& input, - std::optional pattern, - const std::vector>>& output) { - auto valueAt = [&input](vector_size_t row) { - return input[row] ? StringView(*input[row]) : StringView(); - }; - - // Creating vectors for input strings - auto nullAt = [&input](vector_size_t row) { return !input[row].has_value(); }; - - auto result = [&] { - auto inputString = - makeFlatVector(input.size(), valueAt, nullAt); - auto rowVector = makeRowVector({inputString}); - - // Evaluating the function for each input and seed - std::string patternString = - pattern.has_value() ? std::string(", '") + pattern.value() + "'" : ""; - std::string expressionString = - std::string("split(c0") + patternString + ")"; - return evaluate(expressionString, rowVector); - }(); - - // Creating vectors for output string vectors - auto sizeAtOutput = [&output](vector_size_t row) { - return output[row] ? output[row]->size() : 0; - }; - auto valueAtOutput = [&output](vector_size_t row, vector_size_t idx) { - return output[row] ? StringView(output[row]->at(idx)) : StringView(""); - }; - auto nullAtOutput = [&output](vector_size_t row) { - return !output[row].has_value(); - }; - auto expectedResult = makeArrayVector( - output.size(), sizeAtOutput, valueAtOutput, nullAtOutput); - - // Checking the results - assertEqualVectors(expectedResult, result); -} - -TEST_F(SplitTest, reallocationAndCornerCases) { - testSplitCharacter( - {"boo:and:foo", "abcfd", "abcfd:", "", ":ab::cfd::::"}, - ':', - {{{"boo", "and", "foo"}}, - {{"abcfd"}}, - {{"abcfd", ""}}, - {{""}}, - {{"", "ab", "", "cfd", "", "", "", ""}}}); -} - -TEST_F(SplitTest, nulls) { - testSplitCharacter( - {std::nullopt, "abcfd", "abcfd:", std::nullopt, ":ab::cfd::::"}, - ':', - {{std::nullopt}, - {{"abcfd"}}, - {{"abcfd", ""}}, - {{std::nullopt}}, - {{"", "ab", "", "cfd", "", "", "", ""}}}); -} - -TEST_F(SplitTest, defaultArguments) { - testSplitCharacter( - {"boo:and:foo", "abcfd"}, ':', {{{"boo", "and", "foo"}}, {{"abcfd"}}}); -} - -TEST_F(SplitTest, longStrings) { - testSplitCharacter( - {"abcdefghijklkmnopqrstuvwxyz"}, - ',', - {{{"abcdefghijklkmnopqrstuvwxyz"}}}); -} - -} // namespace -} // namespace facebook::velox::functions::sparksql::test