From fcc084d3581ba9b76b4f18031df3739ec24bdc8d Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Sat, 28 Sep 2024 23:12:10 +0800 Subject: [PATCH 1/4] add more function for string-related type --- .../test_files/string/string_query.slt.part | 282 ++++++++++++++++++ 1 file changed, 282 insertions(+) diff --git a/datafusion/sqllogictest/test_files/string/string_query.slt.part b/datafusion/sqllogictest/test_files/string/string_query.slt.part index 96d5ddbd992c..de99717e6a96 100644 --- a/datafusion/sqllogictest/test_files/string/string_query.slt.part +++ b/datafusion/sqllogictest/test_files/string/string_query.slt.part @@ -694,3 +694,285 @@ Andrew nice Andrew and X datafusion📊🔥 cool datafusion📊🔥 and 🔥 And Xiangpeng nice Xiangpeng and Xiangpeng datafusion数据融合 cool datafusion数据融合 and datafusion数据融合 Xiangpeng 🔥 datafusion数据融合 Raphael nice Raphael and R datafusionДатаФусион cool datafusionДатаФусион and аФус Raphael 🔥 datafusionДатаФусион NULL NULL NULL NULL NULL + +# -------------------------------------- +# Test LIKE / ILIKE +# -------------------------------------- + +# TODO: StringView has wrong behavior for LIKE/ILIKE. Enable this after fixing the issue +# see issue: https://github.com/apache/datafusion/issues/12637 +# Test pattern with wildcard characters +#query TTBBBB +#select ascii_1, unicode_1, +# ascii_1 like 'An%' as ascii_like, +# unicode_1 like '%ion数据%' as unicode_like, +# ascii_1 ilike 'An%' as ascii_ilike, +# unicode_1 ilike '%ion数据%' as unicode_ilik +#from test_basic_operator; +#---- +#Andrew datafusion📊🔥 true false true false +#Xiangpeng datafusion数据融合 false true false true +#Raphael datafusionДатаФусион false false false false +#NULL NULL NULL NULL NULL NULL + +# Test pattern without wildcard characters +query TTBBBB +select ascii_1, unicode_1, + ascii_1 like 'An' as ascii_like, + unicode_1 like 'ion数据' as unicode_like, + ascii_1 ilike 'An' as ascii_ilike, + unicode_1 ilike 'ion数据' as unicode_ilik +from test_basic_operator; +---- +Andrew datafusion📊🔥 false false false false +Xiangpeng datafusion数据融合 false false false false +Raphael datafusionДатаФусион false false false false +NULL NULL NULL NULL NULL NULL + +# -------------------------------------- +# Test CHARACTER_LENGTH +# -------------------------------------- + +query II +SELECT + CHARACTER_LENGTH(ascii_1), + CHARACTER_LENGTH(unicode_1) +FROM + test_basic_operator +---- +6 12 +9 14 +7 20 +NULL NULL + +# -------------------------------------- +# Test Start_With +# -------------------------------------- + +query BBBB +SELECT + STARTS_WITH(ascii_1, 'And'), + STARTS_WITH(unicode_1, 'data'), + STARTS_WITH(ascii_1, NULL), + STARTS_WITH(unicode_1, NULL) +FROM test_basic_operator; +---- +true true NULL NULL +false true NULL NULL +false true NULL NULL +NULL NULL NULL NULL + +# -------------------------------------- +# Test ENDS_WITH +# -------------------------------------- + +query BBBB +SELECT + ENDS_WITH(ascii_1, 'w'), + ENDS_WITH(unicode_1, 'ион'), + ENDS_WITH(ascii_1, NULL), + ENDS_WITH(unicode_1, NULL) +FROM test_basic_operator; +---- +true false NULL NULL +false false NULL NULL +false true NULL NULL +NULL NULL NULL NULL + +# -------------------------------------- +# Test LEVENSHTEIN +# -------------------------------------- + +query IIII +SELECT + LEVENSHTEIN(ascii_1, 'Andrew'), + LEVENSHTEIN(unicode_1, 'datafusion数据融合'), + LEVENSHTEIN(ascii_1, NULL), + LEVENSHTEIN(unicode_1, NULL) +FROM test_basic_operator; +---- +0 4 NULL NULL +7 0 NULL NULL +6 10 NULL NULL +NULL NULL NULL NULL + +# -------------------------------------- +# Test LPAD +# -------------------------------------- + +query TTTT +SELECT + LPAD(ascii_1, 20, 'x'), + LPAD(ascii_1, 20, NULL), + LPAD(unicode_1, 20, '🔥'), + LPAD(unicode_1, 20, NULL) +FROM test_basic_operator; +---- +xxxxxxxxxxxxxxAndrew NULL 🔥🔥🔥🔥🔥🔥🔥🔥datafusion📊🔥 NULL +xxxxxxxxxxxXiangpeng NULL 🔥🔥🔥🔥🔥🔥datafusion数据融合 NULL +xxxxxxxxxxxxxRaphael NULL datafusionДатаФусион NULL +NULL NULL NULL NULL + +query TT +SELECT + LPAD(ascii_1, 20), + LPAD(unicode_1, 20) +FROM test_basic_operator; +---- + Andrew datafusion📊🔥 + Xiangpeng datafusion数据融合 + Raphael datafusionДатаФусион +NULL NULL + +# -------------------------------------- +# Test RPAD +# -------------------------------------- + +query TTTT +SELECT + RPAD(ascii_1, 20, 'x'), + RPAD(ascii_1, 20, NULL), + RPAD(unicode_1, 20, '🔥'), + RPAD(unicode_1, 20, NULL) +FROM test_basic_operator; +---- +Andrewxxxxxxxxxxxxxx NULL datafusion📊🔥🔥🔥🔥🔥🔥🔥🔥🔥 NULL +Xiangpengxxxxxxxxxxx NULL datafusion数据融合🔥🔥🔥🔥🔥🔥 NULL +Raphaelxxxxxxxxxxxxx NULL datafusionДатаФусион NULL +NULL NULL NULL NULL + +query TT +SELECT + RPAD(ascii_1, 20), + RPAD(unicode_1, 20) +FROM test_basic_operator; +---- +Andrew datafusion📊🔥 +Xiangpeng datafusion数据融合 +Raphael datafusionДатаФусион +NULL NULL + +# -------------------------------------- +# Test REGEXP_LIKE +# -------------------------------------- + +# TODO: LargeString does not support REGEXP_LIKE. Enable this after fixing the issue +# see issue: https://github.com/apache/datafusion/issues/12664 +#query BBBB +#SELECT +# REGEXP_LIKE(ascii_1, 'an'), +# REGEXP_LIKE(unicode_1, 'таФ'), +# REGEXP_LIKE(ascii_1, NULL), +# REGEXP_LIKE(unicode_1, NULL) +#FROM test_basic_operator; +#---- +#false false NULL NULL +#true false NULL NULL +#false true NULL NULL +#NULL NULL NULL NULL + +# -------------------------------------- +# Test REGEXP_MATCH +# -------------------------------------- + +# TODO: LargeString does not support REGEXP_MATCH. Enable this after fixing the issue +# see issue: https://github.com/apache/datafusion/issues/12664 +#query ???? +#SELECT +# REGEXP_MATCH(ascii_1, 'an'), +# REGEXP_MATCH(unicode_1, 'таФ'), +# REGEXP_MATCH(ascii_1, NULL), +# REGEXP_MATCH(unicode_1, NULL) +#FROM test_basic_operator; +#---- +#NULL NULL NULL NULL +#[an] NULL NULL NULL +#NULL [таФ] NULL NULL +#NULL NULL NULL NULL + +# -------------------------------------- +# Test REPEAT +# -------------------------------------- + +query TT +SELECT + REPEAT(ascii_1, 3), + REPEAT(unicode_1, 3) +FROM test_basic_operator; +---- +AndrewAndrewAndrew datafusion📊🔥datafusion📊🔥datafusion📊🔥 +XiangpengXiangpengXiangpeng datafusion数据融合datafusion数据融合datafusion数据融合 +RaphaelRaphaelRaphael datafusionДатаФусионdatafusionДатаФусионdatafusionДатаФусион +NULL NULL + +# -------------------------------------- +# Test SPLIT_PART +# -------------------------------------- + +query TTTTTT +SELECT + SPLIT_PART(ascii_1, 'e', 1), + SPLIT_PART(ascii_1, 'e', 2), + SPLIT_PART(ascii_1, NULL, 1), + SPLIT_PART(unicode_1, 'и', 1), + SPLIT_PART(unicode_1, 'и', 2), + SPLIT_PART(unicode_1, NULL, 1) +FROM test_basic_operator; +---- +Andr w NULL datafusion📊🔥 (empty) NULL +Xiangp ng NULL datafusion数据融合 (empty) NULL +Rapha l NULL datafusionДатаФус он NULL +NULL NULL NULL NULL NULL NULL + +# -------------------------------------- +# Test REVERSE +# -------------------------------------- + +query TT +SELECT + REVERSE(ascii_1), + REVERSE(unicode_1) +FROM test_basic_operator; +---- +werdnA 🔥📊noisufatad +gnepgnaiX 合融据数noisufatad +leahpaR ноисуФатаДnoisufatad +NULL NULL + +# -------------------------------------- +# Test STRPOS +# -------------------------------------- + +query IIIIII +SELECT + STRPOS(ascii_1, 'e'), + STRPOS(ascii_1, 'ang'), + STRPOS(ascii_1, NULL), + STRPOS(unicode_1, 'и'), + STRPOS(unicode_1, 'ион'), + STRPOS(unicode_1, NULL) +FROM test_basic_operator; +---- +5 0 NULL 0 0 NULL +7 3 NULL 0 0 NULL +6 0 NULL 18 18 NULL +NULL NULL NULL NULL NULL NULL + +# -------------------------------------- +# Test SUBSTR_INDEX +# -------------------------------------- + +query TTTTTT +SELECT + SUBSTR_INDEX(ascii_1, 'e', 1), + SUBSTR_INDEX(ascii_1, 'ang', 1), + SUBSTR_INDEX(ascii_1, NULL, 1), + SUBSTR_INDEX(unicode_1, 'и', 1), + SUBSTR_INDEX(unicode_1, '据融', 1), + SUBSTR_INDEX(unicode_1, NULL, 1) +FROM test_basic_operator; +---- +Andr Andrew NULL datafusion📊🔥 datafusion📊🔥 NULL +Xiangp Xi NULL datafusion数据融合 datafusion数 NULL +Rapha Raphael NULL datafusionДатаФус datafusionДатаФусион NULL +NULL NULL NULL NULL NULL NULL From ab9489a96f2ae07862f3cfaa406e43bc1bfab0e6 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Sat, 28 Sep 2024 23:12:27 +0800 Subject: [PATCH 2/4] move literal test to string_literal.slt --- .../sqllogictest/test_files/functions.slt | 678 ------------------ .../test_files/string/string_literal.slt | 649 +++++++++++++++++ 2 files changed, 649 insertions(+), 678 deletions(-) diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt index e887b1934e04..7d41c26ba012 100644 --- a/datafusion/sqllogictest/test_files/functions.slt +++ b/datafusion/sqllogictest/test_files/functions.slt @@ -18,46 +18,6 @@ # unicode expressions -query I -SELECT char_length('') ----- -0 - -query I -SELECT char_length('chars') ----- -5 - -query I -SELECT char_length('josé') ----- -4 - -query I -SELECT char_length(NULL) ----- -NULL - -query I -SELECT character_length('') ----- -0 - -query I -SELECT character_length('chars') ----- -5 - -query I -SELECT character_length('josé') ----- -4 - -query I -SELECT character_length(NULL) ----- -NULL - query T SELECT left('abcde', -2) ---- @@ -133,152 +93,6 @@ SELECT length(NULL) ---- NULL -query T -SELECT lpad('hi', -1, 'xy') ----- -(empty) - -query T -SELECT lpad('hi', 5, 'xy') ----- -xyxhi - -query T -SELECT lpad('hi', -1) ----- -(empty) - -query T -SELECT lpad('hi', 0) ----- -(empty) - -query T -SELECT lpad('hi', 21, 'abcdef') ----- -abcdefabcdefabcdefahi - -query T -SELECT lpad('hi', 5, 'xy') ----- -xyxhi - -query T -SELECT lpad('hi', 5, NULL) ----- -NULL - -query T -SELECT lpad('hi', 5) ----- - hi - -query T -SELECT lpad(arrow_cast('hi', 'Dictionary(Int32, Utf8)'), 5) ----- - hi - -query T -SELECT lpad('hi', CAST(NULL AS INT), 'xy') ----- -NULL - -query T -SELECT lpad('hi', CAST(NULL AS INT)) ----- -NULL - -query T -SELECT lpad('xyxhi', 3) ----- -xyx - -query T -SELECT lpad(NULL, 0) ----- -NULL - -query T -SELECT lpad(NULL, 5, 'xy') ----- -NULL - -# test largeutf8, utf8view for lpad -query T -SELECT lpad(arrow_cast('hi', 'LargeUtf8'), 5, 'xy') ----- -xyxhi - -query T -SELECT lpad(arrow_cast('hi', 'Utf8View'), 5, 'xy') ----- -xyxhi - -query T -SELECT lpad(arrow_cast('hi', 'LargeUtf8'), 5, arrow_cast('xy', 'LargeUtf8')) ----- -xyxhi - -query T -SELECT lpad(arrow_cast('hi', 'Utf8View'), 5, arrow_cast('xy', 'Utf8View')) ----- -xyxhi - -query T -SELECT lpad(arrow_cast(NULL, 'Utf8View'), 5, 'xy') ----- -NULL - -query T -SELECT reverse('abcde') ----- -edcba - -query T -SELECT reverse(arrow_cast('abcde', 'LargeUtf8')) ----- -edcba - -query T -SELECT reverse(arrow_cast('abcde', 'Utf8View')) ----- -edcba - -query T -SELECT reverse(arrow_cast('abcde', 'Dictionary(Int32, Utf8)')) ----- -edcba - -query T -SELECT reverse('loẅks') ----- -sk̈wol - -query T -SELECT reverse(arrow_cast('loẅks', 'LargeUtf8')) ----- -sk̈wol - -query T -SELECT reverse(arrow_cast('loẅks', 'Utf8View')) ----- -sk̈wol - -query T -SELECT reverse(NULL) ----- -NULL - -query T -SELECT reverse(arrow_cast(NULL, 'LargeUtf8')) ----- -NULL - -query T -SELECT reverse(arrow_cast(NULL, 'Utf8View')) ----- -NULL - query T SELECT right('abcde', -2) ---- @@ -324,124 +138,6 @@ SELECT right(NULL, CAST(NULL AS INT)) ---- NULL - -query T -SELECT rpad('hi', -1, 'xy') ----- -(empty) - -query T -SELECT rpad('hi', 5, 'xy') ----- -hixyx - -query T -SELECT rpad('hi', -1) ----- -(empty) - -query T -SELECT rpad('hi', 0) ----- -(empty) - -query T -SELECT rpad('hi', 21, 'abcdef') ----- -hiabcdefabcdefabcdefa - -query T -SELECT rpad('hi', 5, 'xy') ----- -hixyx - -query T -SELECT rpad(arrow_cast('hi', 'Dictionary(Int32, Utf8)'), 5, 'xy') ----- -hixyx - -query T -SELECT rpad('hi', 5, NULL) ----- -NULL - -query T -SELECT rpad('hi', 5) ----- -hi - -query T -SELECT rpad('hi', CAST(NULL AS INT), 'xy') ----- -NULL - -query T -SELECT rpad('hi', CAST(NULL AS INT)) ----- -NULL - -query T -SELECT rpad('xyxhi', 3) ----- -xyx - -# test for rpad with largeutf8 and utf8View - -query T -SELECT rpad(arrow_cast('hi', 'LargeUtf8'), 5, 'xy') ----- -hixyx - -query T -SELECT rpad(arrow_cast('hi', 'Utf8View'), 5, 'xy') ----- -hixyx - -query T -SELECT rpad(arrow_cast('hi', 'LargeUtf8'), 5, arrow_cast('xy', 'LargeUtf8')) ----- -hixyx - -query T -SELECT rpad(arrow_cast('hi', 'Utf8View'), 5, arrow_cast('xy', 'Utf8View')) ----- -hixyx - -query T -SELECT rpad(arrow_cast(NULL, 'Utf8View'), 5, 'xy') ----- -NULL - -query I -SELECT strpos('abc', 'c') ----- -3 - -query I -SELECT strpos('josé', 'é') ----- -4 - -query I -SELECT strpos('joséésoj', 'so') ----- -6 - -query I -SELECT strpos('joséésoj', 'abc') ----- -0 - -query I -SELECT strpos(NULL, 'abc') ----- -NULL - -query I -SELECT strpos('joséésoj', NULL) ----- -NULL - query T SELECT substr('alphabet', -3) ---- @@ -796,45 +492,6 @@ SELECT md5(arrow_cast('foo', 'Dictionary(Int32, Utf8)')) ---- acbd18db4cc2f85cedef654fccc4a4d8 -query T -SELECT regexp_replace('foobar', 'bar', 'xx', 'gi') ----- -fooxx - -query T -SELECT regexp_replace(arrow_cast('foobar', 'Dictionary(Int32, Utf8)'), 'bar', 'xx', 'gi') ----- -fooxx - -query T -SELECT repeat('foo', 3) ----- -foofoofoo - -query T -SELECT repeat(arrow_cast('foo', 'Dictionary(Int32, Utf8)'), 3) ----- -foofoofoo - -query T -SELECT replace('foobar', 'bar', 'hello') ----- -foohello - -query T -SELECT replace(arrow_cast('foobar', 'Dictionary(Int32, Utf8)'), 'bar', 'hello') ----- -foohello - -query T -SELECT replace(arrow_cast('foobar', 'Utf8View'), arrow_cast('bar', 'Utf8View'), arrow_cast('hello', 'Utf8View')) ----- -foohello - -query T -SELECT replace(arrow_cast('foobar', 'LargeUtf8'), arrow_cast('bar', 'LargeUtf8'), arrow_cast('hello', 'LargeUtf8')) ----- -foohello query T SELECT rtrim(' foo ') @@ -846,68 +503,6 @@ SELECT rtrim(arrow_cast(' foo ', 'Dictionary(Int32, Utf8)')) ---- foo -query T -SELECT split_part('foo_bar', '_', 2) ----- -bar - -query T -SELECT split_part(arrow_cast('foo_bar', 'Dictionary(Int32, Utf8)'), '_', 2) ----- -bar - -# test largeutf8, utf8view for split_part -query T -SELECT split_part(arrow_cast('large_apple_large_orange_large_banana', 'LargeUtf8'), '_', 3) ----- -large - -query T -SELECT split_part(arrow_cast('view_apple_view_orange_view_banana', 'Utf8View'), '_', 3); ----- -view - -query T -SELECT split_part('test_large_split_large_case', arrow_cast('_large', 'LargeUtf8'), 2) ----- -_split - -query T -SELECT split_part(arrow_cast('huge_large_apple_large_orange_large_banana', 'LargeUtf8'), arrow_cast('_', 'Utf8View'), 2) ----- -large - -query T -SELECT split_part(arrow_cast('view_apple_view_large_banana', 'Utf8View'), arrow_cast('_large', 'LargeUtf8'), 2) ----- -_banana - -query T -SELECT split_part(NULL, '_', 2) ----- -NULL - - -query B -SELECT starts_with('foobar', 'foo') ----- -true - -query B -SELECT starts_with('foobar', 'bar') ----- -false - -query B -SELECT ends_with('foobar', 'bar') ----- -true - -query B -SELECT ends_with('foobar', 'foo') ----- -false - query T SELECT trim(' foo ') ---- @@ -1064,279 +659,6 @@ NULL Thomxas NULL -query I -SELECT levenshtein('kitten', 'sitting') ----- -3 - -query I -SELECT levenshtein('kitten', NULL) ----- -NULL - -query I -SELECT levenshtein(NULL, 'sitting') ----- -NULL - -query I -SELECT levenshtein(NULL, NULL) ----- -NULL - -# Test substring_index using '.' as delimiter -# This query is compatible with MySQL(8.0.19 or later), convenient for comparing results -query TIT -SELECT str, n, substring_index(str, '.', n) AS c FROM - (VALUES - ROW('arrow.apache.org'), - ROW('.'), - ROW('...'), - ROW(NULL) - ) AS strings(str), - (VALUES - ROW(1), - ROW(2), - ROW(3), - ROW(100), - ROW(-1), - ROW(-2), - ROW(-3), - ROW(-100) - ) AS occurrences(n) -ORDER BY str DESC, n; ----- -NULL -100 NULL -NULL -3 NULL -NULL -2 NULL -NULL -1 NULL -NULL 1 NULL -NULL 2 NULL -NULL 3 NULL -NULL 100 NULL -arrow.apache.org -100 arrow.apache.org -arrow.apache.org -3 arrow.apache.org -arrow.apache.org -2 apache.org -arrow.apache.org -1 org -arrow.apache.org 1 arrow -arrow.apache.org 2 arrow.apache -arrow.apache.org 3 arrow.apache.org -arrow.apache.org 100 arrow.apache.org -... -100 ... -... -3 .. -... -2 . -... -1 (empty) -... 1 (empty) -... 2 . -... 3 .. -... 100 ... -. -100 . -. -3 . -. -2 . -. -1 (empty) -. 1 (empty) -. 2 . -. 3 . -. 100 . - -query I -SELECT levenshtein(NULL, NULL) ----- -NULL - -# Test substring_index using '.' as delimiter with utf8view -query TIT -SELECT str, n, substring_index(arrow_cast(str, 'Utf8View'), '.', n) AS c FROM - (VALUES - ROW('arrow.apache.org'), - ROW('.'), - ROW('...'), - ROW(NULL) - ) AS strings(str), - (VALUES - ROW(1), - ROW(2), - ROW(3), - ROW(100), - ROW(-1), - ROW(-2), - ROW(-3), - ROW(-100) - ) AS occurrences(n) -ORDER BY str DESC, n; ----- -NULL -100 NULL -NULL -3 NULL -NULL -2 NULL -NULL -1 NULL -NULL 1 NULL -NULL 2 NULL -NULL 3 NULL -NULL 100 NULL -arrow.apache.org -100 arrow.apache.org -arrow.apache.org -3 arrow.apache.org -arrow.apache.org -2 apache.org -arrow.apache.org -1 org -arrow.apache.org 1 arrow -arrow.apache.org 2 arrow.apache -arrow.apache.org 3 arrow.apache.org -arrow.apache.org 100 arrow.apache.org -... -100 ... -... -3 .. -... -2 . -... -1 (empty) -... 1 (empty) -... 2 . -... 3 .. -... 100 ... -. -100 . -. -3 . -. -2 . -. -1 (empty) -. 1 (empty) -. 2 . -. 3 . -. 100 . - -# Test substring_index using 'ac' as delimiter -query TIT -SELECT str, n, substring_index(str, 'ac', n) AS c FROM - (VALUES - -- input string does not contain the delimiter - ROW('arrow'), - -- input string contains the delimiter - ROW('arrow.apache.org') - ) AS strings(str), - (VALUES - ROW(1), - ROW(2), - ROW(-1), - ROW(-2) - ) AS occurrences(n) -ORDER BY str DESC, n; ----- -arrow.apache.org -2 arrow.apache.org -arrow.apache.org -1 he.org -arrow.apache.org 1 arrow.ap -arrow.apache.org 2 arrow.apache.org -arrow -2 arrow -arrow -1 arrow -arrow 1 arrow -arrow 2 arrow - -# Test substring_index with NULL values -query TTTT -SELECT - substring_index(NULL, '.', 1), - substring_index('arrow.apache.org', NULL, 1), - substring_index('arrow.apache.org', '.', NULL), - substring_index(NULL, NULL, NULL) ----- -NULL NULL NULL NULL - -# Test substring_index with empty strings -query TT -SELECT - -- input string is empty - substring_index('', '.', 1), - -- delimiter is empty - substring_index('arrow.apache.org', '', 1) ----- -(empty) (empty) - -# Test substring_index with 0 occurrence -query T -SELECT substring_index('arrow.apache.org', 'ac', 0) ----- -(empty) - -# Test substring_index with large occurrences -query TT -SELECT - -- i64::MIN - substring_index('arrow.apache.org', '.', -9223372036854775808) as c1, - -- i64::MAX - substring_index('arrow.apache.org', '.', 9223372036854775807) as c2; ----- -arrow.apache.org arrow.apache.org - -# Test substring_index issue https://github.com/apache/datafusion/issues/9472 -query TTT -SELECT - url, - substring_index(url, '.', 1) AS subdomain, - substring_index(url, '.', -1) AS tld -FROM - (VALUES ROW('docs.apache.com'), - ROW('community.influxdata.com'), - ROW('arrow.apache.org') - ) data(url) ----- -docs.apache.com docs com -community.influxdata.com community com -arrow.apache.org arrow org - -# find_in_set tests -query I -SELECT find_in_set('b', 'a,b,c,d') ----- -2 - - -query I -SELECT find_in_set('a', 'a,b,c,d,a') ----- -1 - -query I -SELECT find_in_set('', 'a,b,c,d,a') ----- -0 - -query I -SELECT find_in_set('a', '') ----- -0 - - -query I -SELECT find_in_set('', '') ----- -1 - -query I -SELECT find_in_set(NULL, 'a,b,c,d') ----- -NULL - -query I -SELECT find_in_set('a', NULL) ----- -NULL - - -query I -SELECT find_in_set(NULL, NULL) ----- -NULL - -# find_in_set tests with utf8view -query I -SELECT find_in_set(arrow_cast('b', 'Utf8View'), 'a,b,c,d') ----- -2 - - -query I -SELECT find_in_set('a', arrow_cast('a,b,c,d,a', 'Utf8View')) ----- -1 - -query I -SELECT find_in_set(arrow_cast('', 'Utf8View'), arrow_cast('a,b,c,d,a', 'Utf8View')) ----- -0 - # Verify that multiple calls to volatile functions like `random()` are not combined / optimized away query B SELECT r FROM (SELECT r1 == r2 r, r1, r2 FROM (SELECT random()+1 r1, random()+1 r2) WHERE r1 > 0 AND r2 > 0) diff --git a/datafusion/sqllogictest/test_files/string/string_literal.slt b/datafusion/sqllogictest/test_files/string/string_literal.slt index 24e03fdb7184..5d847747693d 100644 --- a/datafusion/sqllogictest/test_files/string/string_literal.slt +++ b/datafusion/sqllogictest/test_files/string/string_literal.slt @@ -167,3 +167,652 @@ query D select make_date(arrow_cast('2024', 'Utf8View'), arrow_cast('01', 'Utf8View'), arrow_cast('23', 'Utf8View')) ---- 2024-01-23 + +query I +SELECT character_length('') +---- +0 + +query I +SELECT character_length('chars') +---- +5 + +query I +SELECT character_length('josé') +---- +4 + +query I +SELECT character_length(NULL) +---- +NULL + +query B +SELECT ends_with('foobar', 'bar') +---- +true + +query B +SELECT ends_with('foobar', 'foo') +---- +false + +query I +SELECT levenshtein('kitten', 'sitting') +---- +3 + +query I +SELECT levenshtein('kitten', NULL) +---- +NULL + +query I +SELECT levenshtein(NULL, 'sitting') +---- +NULL + +query I +SELECT levenshtein(NULL, NULL) +---- +NULL + + +query T +SELECT lpad('hi', -1, 'xy') +---- +(empty) + +query T +SELECT lpad('hi', 5, 'xy') +---- +xyxhi + +query T +SELECT lpad('hi', -1) +---- +(empty) + +query T +SELECT lpad('hi', 0) +---- +(empty) + +query T +SELECT lpad('hi', 21, 'abcdef') +---- +abcdefabcdefabcdefahi + +query T +SELECT lpad('hi', 5, 'xy') +---- +xyxhi + +query T +SELECT lpad('hi', 5, NULL) +---- +NULL + +query T +SELECT lpad('hi', 5) +---- + hi + +query T +SELECT lpad('hi', CAST(NULL AS INT), 'xy') +---- +NULL + +query T +SELECT lpad('hi', CAST(NULL AS INT)) +---- +NULL + +query T +SELECT lpad('xyxhi', 3) +---- +xyx + +query T +SELECT lpad(NULL, 0) +---- +NULL + +query T +SELECT lpad(NULL, 5, 'xy') +---- +NULL + +query T +SELECT regexp_replace('foobar', 'bar', 'xx', 'gi') +---- +fooxx + +query T +SELECT regexp_replace(arrow_cast('foobar', 'Dictionary(Int32, Utf8)'), 'bar', 'xx', 'gi') +---- +fooxx + +query T +SELECT repeat('foo', 3) +---- +foofoofoo + +query T +SELECT repeat(arrow_cast('foo', 'Dictionary(Int32, Utf8)'), 3) +---- +foofoofoo + + +query T +SELECT replace('foobar', 'bar', 'hello') +---- +foohello + +query T +SELECT replace(arrow_cast('foobar', 'Dictionary(Int32, Utf8)'), 'bar', 'hello') +---- +foohello + +query T +SELECT replace(arrow_cast('foobar', 'Utf8View'), arrow_cast('bar', 'Utf8View'), arrow_cast('hello', 'Utf8View')) +---- +foohello + +query T +SELECT replace(arrow_cast('foobar', 'LargeUtf8'), arrow_cast('bar', 'LargeUtf8'), arrow_cast('hello', 'LargeUtf8')) +---- +foohello + + +query T +SELECT reverse('abcde') +---- +edcba + +query T +SELECT reverse(arrow_cast('abcde', 'LargeUtf8')) +---- +edcba + +query T +SELECT reverse(arrow_cast('abcde', 'Utf8View')) +---- +edcba + +query T +SELECT reverse(arrow_cast('abcde', 'Dictionary(Int32, Utf8)')) +---- +edcba + +query T +SELECT reverse('loẅks') +---- +sk̈wol + +query T +SELECT reverse(arrow_cast('loẅks', 'LargeUtf8')) +---- +sk̈wol + +query T +SELECT reverse(arrow_cast('loẅks', 'Utf8View')) +---- +sk̈wol + +query T +SELECT reverse(NULL) +---- +NULL + +query T +SELECT reverse(arrow_cast(NULL, 'LargeUtf8')) +---- +NULL + +query T +SELECT reverse(arrow_cast(NULL, 'Utf8View')) +---- +NULL + + +query I +SELECT strpos('abc', 'c') +---- +3 + +query I +SELECT strpos('josé', 'é') +---- +4 + +query I +SELECT strpos('joséésoj', 'so') +---- +6 + +query I +SELECT strpos('joséésoj', 'abc') +---- +0 + +query I +SELECT strpos(NULL, 'abc') +---- +NULL + +query I +SELECT strpos('joséésoj', NULL) +---- +NULL + + + +query T +SELECT rpad('hi', -1, 'xy') +---- +(empty) + +query T +SELECT rpad('hi', 5, 'xy') +---- +hixyx + +query T +SELECT rpad('hi', -1) +---- +(empty) + +query T +SELECT rpad('hi', 0) +---- +(empty) + +query T +SELECT rpad('hi', 21, 'abcdef') +---- +hiabcdefabcdefabcdefa + +query T +SELECT rpad('hi', 5, 'xy') +---- +hixyx + +query T +SELECT rpad(arrow_cast('hi', 'Dictionary(Int32, Utf8)'), 5, 'xy') +---- +hixyx + +query T +SELECT rpad('hi', 5, NULL) +---- +NULL + +query T +SELECT rpad('hi', 5) +---- +hi + +query T +SELECT rpad('hi', CAST(NULL AS INT), 'xy') +---- +NULL + +query T +SELECT rpad('hi', CAST(NULL AS INT)) +---- +NULL + +query T +SELECT rpad('xyxhi', 3) +---- +xyx + +# test for rpad with largeutf8 and utf8View + +query T +SELECT rpad(arrow_cast('hi', 'LargeUtf8'), 5, 'xy') +---- +hixyx + +query T +SELECT rpad(arrow_cast('hi', 'Utf8View'), 5, 'xy') +---- +hixyx + +query T +SELECT rpad(arrow_cast('hi', 'LargeUtf8'), 5, arrow_cast('xy', 'LargeUtf8')) +---- +hixyx + +query T +SELECT rpad(arrow_cast('hi', 'Utf8View'), 5, arrow_cast('xy', 'Utf8View')) +---- +hixyx + +query T +SELECT rpad(arrow_cast(NULL, 'Utf8View'), 5, 'xy') +---- +NULL + +query I +SELECT char_length('') +---- +0 + +query I +SELECT char_length('chars') +---- +5 + +query I +SELECT char_length('josé') +---- +4 + +query I +SELECT char_length(NULL) +---- +NULL + +# Test substring_index using '.' as delimiter +# This query is compatible with MySQL(8.0.19 or later), convenient for comparing results +query TIT +SELECT str, n, substring_index(str, '.', n) AS c FROM + (VALUES + ROW('arrow.apache.org'), + ROW('.'), + ROW('...'), + ROW(NULL) + ) AS strings(str), + (VALUES + ROW(1), + ROW(2), + ROW(3), + ROW(100), + ROW(-1), + ROW(-2), + ROW(-3), + ROW(-100) + ) AS occurrences(n) +ORDER BY str DESC, n; +---- +NULL -100 NULL +NULL -3 NULL +NULL -2 NULL +NULL -1 NULL +NULL 1 NULL +NULL 2 NULL +NULL 3 NULL +NULL 100 NULL +arrow.apache.org -100 arrow.apache.org +arrow.apache.org -3 arrow.apache.org +arrow.apache.org -2 apache.org +arrow.apache.org -1 org +arrow.apache.org 1 arrow +arrow.apache.org 2 arrow.apache +arrow.apache.org 3 arrow.apache.org +arrow.apache.org 100 arrow.apache.org +... -100 ... +... -3 .. +... -2 . +... -1 (empty) +... 1 (empty) +... 2 . +... 3 .. +... 100 ... +. -100 . +. -3 . +. -2 . +. -1 (empty) +. 1 (empty) +. 2 . +. 3 . +. 100 . + +# Test substring_index using '.' as delimiter with utf8view +query TIT +SELECT str, n, substring_index(arrow_cast(str, 'Utf8View'), '.', n) AS c FROM + (VALUES + ROW('arrow.apache.org'), + ROW('.'), + ROW('...'), + ROW(NULL) + ) AS strings(str), + (VALUES + ROW(1), + ROW(2), + ROW(3), + ROW(100), + ROW(-1), + ROW(-2), + ROW(-3), + ROW(-100) + ) AS occurrences(n) +ORDER BY str DESC, n; +---- +NULL -100 NULL +NULL -3 NULL +NULL -2 NULL +NULL -1 NULL +NULL 1 NULL +NULL 2 NULL +NULL 3 NULL +NULL 100 NULL +arrow.apache.org -100 arrow.apache.org +arrow.apache.org -3 arrow.apache.org +arrow.apache.org -2 apache.org +arrow.apache.org -1 org +arrow.apache.org 1 arrow +arrow.apache.org 2 arrow.apache +arrow.apache.org 3 arrow.apache.org +arrow.apache.org 100 arrow.apache.org +... -100 ... +... -3 .. +... -2 . +... -1 (empty) +... 1 (empty) +... 2 . +... 3 .. +... 100 ... +. -100 . +. -3 . +. -2 . +. -1 (empty) +. 1 (empty) +. 2 . +. 3 . +. 100 . + +# Test substring_index using 'ac' as delimiter +query TIT +SELECT str, n, substring_index(str, 'ac', n) AS c FROM + (VALUES + -- input string does not contain the delimiter + ROW('arrow'), + -- input string contains the delimiter + ROW('arrow.apache.org') + ) AS strings(str), + (VALUES + ROW(1), + ROW(2), + ROW(-1), + ROW(-2) + ) AS occurrences(n) +ORDER BY str DESC, n; +---- +arrow.apache.org -2 arrow.apache.org +arrow.apache.org -1 he.org +arrow.apache.org 1 arrow.ap +arrow.apache.org 2 arrow.apache.org +arrow -2 arrow +arrow -1 arrow +arrow 1 arrow +arrow 2 arrow + +# Test substring_index with NULL values +query TTTT +SELECT + substring_index(NULL, '.', 1), + substring_index('arrow.apache.org', NULL, 1), + substring_index('arrow.apache.org', '.', NULL), + substring_index(NULL, NULL, NULL) +---- +NULL NULL NULL NULL + +# Test substring_index with empty strings +query TT +SELECT + -- input string is empty + substring_index('', '.', 1), + -- delimiter is empty + substring_index('arrow.apache.org', '', 1) +---- +(empty) (empty) + +# Test substring_index with 0 occurrence +query T +SELECT substring_index('arrow.apache.org', 'ac', 0) +---- +(empty) + +# Test substring_index with large occurrences +query TT +SELECT + -- i64::MIN + substring_index('arrow.apache.org', '.', -9223372036854775808) as c1, + -- i64::MAX + substring_index('arrow.apache.org', '.', 9223372036854775807) as c2; +---- +arrow.apache.org arrow.apache.org + +# Test substring_index issue https://github.com/apache/datafusion/issues/9472 +query TTT +SELECT + url, + substring_index(url, '.', 1) AS subdomain, + substring_index(url, '.', -1) AS tld +FROM + (VALUES ROW('docs.apache.com'), + ROW('community.influxdata.com'), + ROW('arrow.apache.org') + ) data(url) +---- +docs.apache.com docs com +community.influxdata.com community com +arrow.apache.org arrow org + + +# find_in_set tests +query I +SELECT find_in_set('b', 'a,b,c,d') +---- +2 + + +query I +SELECT find_in_set('a', 'a,b,c,d,a') +---- +1 + +query I +SELECT find_in_set('', 'a,b,c,d,a') +---- +0 + +query I +SELECT find_in_set('a', '') +---- +0 + + +query I +SELECT find_in_set('', '') +---- +1 + +query I +SELECT find_in_set(NULL, 'a,b,c,d') +---- +NULL + +query I +SELECT find_in_set('a', NULL) +---- +NULL + + +query I +SELECT find_in_set(NULL, NULL) +---- +NULL + +# find_in_set tests with utf8view +query I +SELECT find_in_set(arrow_cast('b', 'Utf8View'), 'a,b,c,d') +---- +2 + + +query I +SELECT find_in_set('a', arrow_cast('a,b,c,d,a', 'Utf8View')) +---- +1 + +query I +SELECT find_in_set(arrow_cast('', 'Utf8View'), arrow_cast('a,b,c,d,a', 'Utf8View')) +---- +0 + + +query T +SELECT split_part('foo_bar', '_', 2) +---- +bar + +query T +SELECT split_part(arrow_cast('foo_bar', 'Dictionary(Int32, Utf8)'), '_', 2) +---- +bar + +# test largeutf8, utf8view for split_part +query T +SELECT split_part(arrow_cast('large_apple_large_orange_large_banana', 'LargeUtf8'), '_', 3) +---- +large + +query T +SELECT split_part(arrow_cast('view_apple_view_orange_view_banana', 'Utf8View'), '_', 3); +---- +view + +query T +SELECT split_part('test_large_split_large_case', arrow_cast('_large', 'LargeUtf8'), 2) +---- +_split + +query T +SELECT split_part(arrow_cast('huge_large_apple_large_orange_large_banana', 'LargeUtf8'), arrow_cast('_', 'Utf8View'), 2) +---- +large + +query T +SELECT split_part(arrow_cast('view_apple_view_large_banana', 'Utf8View'), arrow_cast('_large', 'LargeUtf8'), 2) +---- +_banana + +query T +SELECT split_part(NULL, '_', 2) +---- +NULL + +query B +SELECT starts_with('foobar', 'foo') +---- +true + +query B +SELECT starts_with('foobar', 'bar') +---- +false From c48481e8cda71354820420cde76014771186af32 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Sat, 28 Sep 2024 23:15:27 +0800 Subject: [PATCH 3/4] add disable test to supported type file --- .../test_files/string/dictionary_utf8.slt | 46 +++++++++++++++++++ .../test_files/string/large_string.slt | 16 +++++++ .../sqllogictest/test_files/string/string.slt | 46 +++++++++++++++++++ .../test_files/string/string_view.slt | 30 ++++++++++++ 4 files changed, 138 insertions(+) diff --git a/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt b/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt index 9d2460816709..ea3c9b8eb6ca 100644 --- a/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt +++ b/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt @@ -37,6 +37,52 @@ select arrow_cast(col1, 'Dictionary(Int32, Utf8)') as c1 from test_substr_base; statement ok drop table test_source +# TODO: move it back to `string_query.slt.part` after fixing the issue +# see detail: https://github.com/apache/datafusion/issues/12637 +# Test pattern with wildcard characters +query TTBBBB +select ascii_1, unicode_1, + ascii_1 like 'An%' as ascii_like, + unicode_1 like '%ion数据%' as unicode_like, + ascii_1 ilike 'An%' as ascii_ilike, + unicode_1 ilike '%ion数据%' as unicode_ilik +from test_basic_operator; +---- +Andrew datafusion📊🔥 true false true false +Xiangpeng datafusion数据融合 false true false true +Raphael datafusionДатаФусион false false false false +NULL NULL NULL NULL NULL NULL + +# TODO: move it back to `string_query.slt.part` after fixing the issue +# see detail: https://github.com/apache/datafusion/issues/12664 +query BBBB +SELECT + REGEXP_LIKE(ascii_1, 'an'), + REGEXP_LIKE(unicode_1, 'таФ'), + REGEXP_LIKE(ascii_1, NULL), + REGEXP_LIKE(unicode_1, NULL) +FROM test_basic_operator; +---- +false false NULL NULL +true false NULL NULL +false true NULL NULL +NULL NULL NULL NULL + +# TODO: move it back to `string_query.slt.part` after fixing the issue +# see detail: https://github.com/apache/datafusion/issues/12664 +query ???? +SELECT + REGEXP_MATCH(ascii_1, 'an'), + REGEXP_MATCH(unicode_1, 'таФ'), + REGEXP_MATCH(ascii_1, NULL), + REGEXP_MATCH(unicode_1, NULL) +FROM test_basic_operator; +---- +NULL NULL NULL NULL +[an] NULL NULL NULL +NULL [таФ] NULL NULL +NULL NULL NULL NULL + # # common test for string-like functions and operators # diff --git a/datafusion/sqllogictest/test_files/string/large_string.slt b/datafusion/sqllogictest/test_files/string/large_string.slt index a2e570073ff6..af6d104e57ac 100644 --- a/datafusion/sqllogictest/test_files/string/large_string.slt +++ b/datafusion/sqllogictest/test_files/string/large_string.slt @@ -43,6 +43,22 @@ Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 Raphael R datafusionДатаФусион аФус NULL R NULL 🔥 +# TODO: move it back to `string_query.slt.part` after fixing the issue +# see detail: https://github.com/apache/datafusion/issues/12637 +# Test pattern with wildcard characters +query TTBBBB +select ascii_1, unicode_1, + ascii_1 like 'An%' as ascii_like, + unicode_1 like '%ion数据%' as unicode_like, + ascii_1 ilike 'An%' as ascii_ilike, + unicode_1 ilike '%ion数据%' as unicode_ilik +from test_basic_operator; +---- +Andrew datafusion📊🔥 true false true false +Xiangpeng datafusion数据融合 false true false true +Raphael datafusionДатаФусион false false false false +NULL NULL NULL NULL NULL NULL + # TODO: move it back to `string_query.slt.part` after fixing the issue # https://github.com/apache/datafusion/issues/12618 query BB diff --git a/datafusion/sqllogictest/test_files/string/string.slt b/datafusion/sqllogictest/test_files/string/string.slt index bc923d5e12c3..ad568438a470 100644 --- a/datafusion/sqllogictest/test_files/string/string.slt +++ b/datafusion/sqllogictest/test_files/string/string.slt @@ -47,6 +47,52 @@ false false false true NULL NULL +# TODO: move it back to `string_query.slt.part` after fixing the issue +# see detail: https://github.com/apache/datafusion/issues/12637 +# Test pattern with wildcard characters +query TTBBBB +select ascii_1, unicode_1, + ascii_1 like 'An%' as ascii_like, + unicode_1 like '%ion数据%' as unicode_like, + ascii_1 ilike 'An%' as ascii_ilike, + unicode_1 ilike '%ion数据%' as unicode_ilik +from test_basic_operator; +---- +Andrew datafusion📊🔥 true false true false +Xiangpeng datafusion数据融合 false true false true +Raphael datafusionДатаФусион false false false false +NULL NULL NULL NULL NULL NULL + +# TODO: move it back to `string_query.slt.part` after fixing the issue +# see detail: https://github.com/apache/datafusion/issues/12664 +query BBBB +SELECT + REGEXP_LIKE(ascii_1, 'an'), + REGEXP_LIKE(unicode_1, 'таФ'), + REGEXP_LIKE(ascii_1, NULL), + REGEXP_LIKE(unicode_1, NULL) +FROM test_basic_operator; +---- +false false NULL NULL +true false NULL NULL +false true NULL NULL +NULL NULL NULL NULL + +# TODO: move it back to `string_query.slt.part` after fixing the issue +# see detail: https://github.com/apache/datafusion/issues/12664 +query ???? +SELECT + REGEXP_MATCH(ascii_1, 'an'), + REGEXP_MATCH(unicode_1, 'таФ'), + REGEXP_MATCH(ascii_1, NULL), + REGEXP_MATCH(unicode_1, NULL) +FROM test_basic_operator; +---- +NULL NULL NULL NULL +[an] NULL NULL NULL +NULL [таФ] NULL NULL +NULL NULL NULL NULL + # # common test for string-like functions and operators # diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt b/datafusion/sqllogictest/test_files/string/string_view.slt index e7b55c9c1c8c..7e25db84599c 100644 --- a/datafusion/sqllogictest/test_files/string/string_view.slt +++ b/datafusion/sqllogictest/test_files/string/string_view.slt @@ -50,6 +50,36 @@ false false false true NULL NULL +# TODO: move it back to `string_query.slt.part` after fixing the issue +# see detail: https://github.com/apache/datafusion/issues/12664 +query BBBB +SELECT + REGEXP_LIKE(ascii_1, 'an'), + REGEXP_LIKE(unicode_1, 'таФ'), + REGEXP_LIKE(ascii_1, NULL), + REGEXP_LIKE(unicode_1, NULL) +FROM test_basic_operator; +---- +false false NULL NULL +true false NULL NULL +false true NULL NULL +NULL NULL NULL NULL + +# TODO: move it back to `string_query.slt.part` after fixing the issue +# see detail: https://github.com/apache/datafusion/issues/12664 +query ???? +SELECT + REGEXP_MATCH(ascii_1, 'an'), + REGEXP_MATCH(unicode_1, 'таФ'), + REGEXP_MATCH(ascii_1, NULL), + REGEXP_MATCH(unicode_1, NULL) +FROM test_basic_operator; +---- +NULL NULL NULL NULL +[an] NULL NULL NULL +NULL [таФ] NULL NULL +NULL NULL NULL NULL + # # common test for string-like functions and operators # From 9a79ae9eb8769d7269458ef2ee3e4afa8c55bf47 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Sun, 29 Sep 2024 11:37:17 +0800 Subject: [PATCH 4/4] disable strpos test for `dictionary string` --- .../test_files/string/large_string.slt | 17 +++++++++++ .../sqllogictest/test_files/string/string.slt | 17 +++++++++++ .../test_files/string/string_query.slt.part | 30 ++++++++++--------- .../test_files/string/string_view.slt | 17 +++++++++++ 4 files changed, 67 insertions(+), 14 deletions(-) diff --git a/datafusion/sqllogictest/test_files/string/large_string.slt b/datafusion/sqllogictest/test_files/string/large_string.slt index af6d104e57ac..169c658e5ac1 100644 --- a/datafusion/sqllogictest/test_files/string/large_string.slt +++ b/datafusion/sqllogictest/test_files/string/large_string.slt @@ -72,6 +72,23 @@ false false false true NULL NULL +# TODO: move it back to `string_query.slt.part` after fixing the issue +# see detail: https://github.com/apache/datafusion/issues/12670 +query IIIIII +SELECT + STRPOS(ascii_1, 'e'), + STRPOS(ascii_1, 'ang'), + STRPOS(ascii_1, NULL), + STRPOS(unicode_1, 'и'), + STRPOS(unicode_1, 'ион'), + STRPOS(unicode_1, NULL) +FROM test_basic_operator; +---- +5 0 NULL 0 0 NULL +7 3 NULL 0 0 NULL +6 0 NULL 18 18 NULL +NULL NULL NULL NULL NULL NULL + # # common test for string-like functions and operators # diff --git a/datafusion/sqllogictest/test_files/string/string.slt b/datafusion/sqllogictest/test_files/string/string.slt index ad568438a470..6b89147c5c4f 100644 --- a/datafusion/sqllogictest/test_files/string/string.slt +++ b/datafusion/sqllogictest/test_files/string/string.slt @@ -93,6 +93,23 @@ NULL NULL NULL NULL NULL [таФ] NULL NULL NULL NULL NULL NULL +# TODO: move it back to `string_query.slt.part` after fixing the issue +# see detail: https://github.com/apache/datafusion/issues/12670 +query IIIIII +SELECT + STRPOS(ascii_1, 'e'), + STRPOS(ascii_1, 'ang'), + STRPOS(ascii_1, NULL), + STRPOS(unicode_1, 'и'), + STRPOS(unicode_1, 'ион'), + STRPOS(unicode_1, NULL) +FROM test_basic_operator; +---- +5 0 NULL 0 0 NULL +7 3 NULL 0 0 NULL +6 0 NULL 18 18 NULL +NULL NULL NULL NULL NULL NULL + # # common test for string-like functions and operators # diff --git a/datafusion/sqllogictest/test_files/string/string_query.slt.part b/datafusion/sqllogictest/test_files/string/string_query.slt.part index de99717e6a96..0af0a6a642b2 100644 --- a/datafusion/sqllogictest/test_files/string/string_query.slt.part +++ b/datafusion/sqllogictest/test_files/string/string_query.slt.part @@ -943,20 +943,22 @@ NULL NULL # Test STRPOS # -------------------------------------- -query IIIIII -SELECT - STRPOS(ascii_1, 'e'), - STRPOS(ascii_1, 'ang'), - STRPOS(ascii_1, NULL), - STRPOS(unicode_1, 'и'), - STRPOS(unicode_1, 'ион'), - STRPOS(unicode_1, NULL) -FROM test_basic_operator; ----- -5 0 NULL 0 0 NULL -7 3 NULL 0 0 NULL -6 0 NULL 18 18 NULL -NULL NULL NULL NULL NULL NULL +# TODO: DictionaryString does not support STRPOS. Enable this after fixing the issue +# see issue: https://github.com/apache/datafusion/issues/12670 +#query IIIIII +#SELECT +# STRPOS(ascii_1, 'e'), +# STRPOS(ascii_1, 'ang'), +# STRPOS(ascii_1, NULL), +# STRPOS(unicode_1, 'и'), +# STRPOS(unicode_1, 'ион'), +# STRPOS(unicode_1, NULL) +#FROM test_basic_operator; +#---- +#5 0 NULL 0 0 NULL +#7 3 NULL 0 0 NULL +#6 0 NULL 18 18 NULL +#NULL NULL NULL NULL NULL NULL # -------------------------------------- # Test SUBSTR_INDEX diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt b/datafusion/sqllogictest/test_files/string/string_view.slt index 7e25db84599c..fb82726e3a9d 100644 --- a/datafusion/sqllogictest/test_files/string/string_view.slt +++ b/datafusion/sqllogictest/test_files/string/string_view.slt @@ -80,6 +80,23 @@ NULL NULL NULL NULL NULL [таФ] NULL NULL NULL NULL NULL NULL +# TODO: move it back to `string_query.slt.part` after fixing the issue +# see detail: https://github.com/apache/datafusion/issues/12670 +query IIIIII +SELECT + STRPOS(ascii_1, 'e'), + STRPOS(ascii_1, 'ang'), + STRPOS(ascii_1, NULL), + STRPOS(unicode_1, 'и'), + STRPOS(unicode_1, 'ион'), + STRPOS(unicode_1, NULL) +FROM test_basic_operator; +---- +5 0 NULL 0 0 NULL +7 3 NULL 0 0 NULL +6 0 NULL 18 18 NULL +NULL NULL NULL NULL NULL NULL + # # common test for string-like functions and operators #