diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 40be1f85391d6..c5dd0c36d0683 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -1476,13 +1476,28 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { negated, escape_char: _, case_insensitive: _, - }) if !is_null(&expr) - && matches!( - pattern.as_ref(), - Expr::Literal(ScalarValue::Utf8(Some(pattern_str))) if pattern_str == "%" - ) => + }) if matches!( + pattern.as_ref(), + Expr::Literal(ScalarValue::Utf8(Some(pattern_str))) if pattern_str == "%" + ) || matches!( + pattern.as_ref(), + Expr::Literal(ScalarValue::LargeUtf8(Some(pattern_str))) if pattern_str == "%" + ) || matches!( + pattern.as_ref(), + Expr::Literal(ScalarValue::Utf8View(Some(pattern_str))) if pattern_str == "%" + ) => { - Transformed::yes(lit(!negated)) + // exp LIKE '%' is + // - when exp is not NULL, it's true + // - when exp is NULL, it's NULL + // exp NOT LIKE '%' is + // - when exp is not NULL, it's false + // - when exp is NULL, it's NULL + Transformed::yes(Expr::Case(Case { + expr: Some(Box::new(Expr::IsNotNull(expr))), + when_then_expr: vec![(Box::new(lit(true)), Box::new(lit(!negated)))], + else_expr: None, + })) } // a is not null/unknown --> true (if a is not nullable) diff --git a/datafusion/sqllogictest/test_files/string/string.slt b/datafusion/sqllogictest/test_files/string/string.slt index 9e97712b68717..68111ba34696e 100644 --- a/datafusion/sqllogictest/test_files/string/string.slt +++ b/datafusion/sqllogictest/test_files/string/string.slt @@ -34,6 +34,14 @@ statement ok create table test_substr as select arrow_cast(col1, 'Utf8') as c1 from test_substr_base; +query BBB +SELECT + NULL LIKE '%', + '' LIKE '%', + 'a' LIKE '%' +---- +NULL true true + # TODO: move it back to `string_query.slt.part` after fixing the issue # see detail: https://github.com/apache/datafusion/issues/12637 # Test pattern with wildcard characters diff --git a/datafusion/sqllogictest/test_files/string/string_query.slt.part b/datafusion/sqllogictest/test_files/string/string_query.slt.part index c4975b5b8c8dc..57fb09bca9e42 100644 --- a/datafusion/sqllogictest/test_files/string/string_query.slt.part +++ b/datafusion/sqllogictest/test_files/string/string_query.slt.part @@ -873,6 +873,58 @@ NULL NULL NULL NULL NULL #Raphael datafusionДатаФусион false false false false #NULL NULL NULL NULL NULL NULL +# TODO (https://github.com/apache/datafusion/issues/12637) uncomment additional test projections +query TTBB +SELECT ascii_1, unicode_1, + ascii_1 LIKE '%' AS ascii_1_like_percent, + unicode_1 LIKE '%' AS unicode_1_like_percent + -- ascii_1 LIKE '%%' AS ascii_1_like_percent_percent, -- TODO enable after fixing https://github.com/apache/datafusion/issues/12637 + -- unicode_1 LIKE '%%' AS unicode_1_like_percent_percent -- TODO enable after fixing https://github.com/apache/datafusion/issues/12637 +FROM test_basic_operator +---- +Andrew datafusion📊🔥 true true +Xiangpeng datafusion数据融合 true true +Raphael datafusionДатаФусион true true +under_score un iść core true true +percent pan Tadeusz ma iść w kąt true true +(empty) (empty) true true +NULL NULL NULL NULL +NULL NULL NULL NULL + +# TODO (https://github.com/apache/datafusion/issues/12637) uncomment additional test projections +query TTBB +SELECT ascii_1, unicode_1, + ascii_1 NOT LIKE '%' AS ascii_1_not_like_percent, + unicode_1 NOT LIKE '%' AS unicode_1_not_like_percent + -- ascii_1 NOT LIKE '%%' AS ascii_1_not_like_percent_percent, -- TODO enable after fixing https://github.com/apache/datafusion/issues/12637 + -- unicode_1 NOT LIKE '%%' AS unicode_1_not_like_percent_percent -- TODO enable after fixing https://github.com/apache/datafusion/issues/12637 +FROM test_basic_operator +---- +Andrew datafusion📊🔥 false false +Xiangpeng datafusion数据融合 false false +Raphael datafusionДатаФусион false false +under_score un iść core false false +percent pan Tadeusz ma iść w kąt false false +(empty) (empty) false false +NULL NULL NULL NULL +NULL NULL NULL NULL + +query T +SELECT ascii_1 FROM test_basic_operator WHERE ascii_1 LIKE '%' +---- +Andrew +Xiangpeng +Raphael +under_score +percent +(empty) + +# TODO: move it back to `string_query.slt.part` after fixing the issue +# see detail: https://github.com/apache/datafusion/issues/12637 +query T +SELECT ascii_1 FROM test_basic_operator WHERE ascii_1 NOT LIKE '%' +---- + # Test pattern without wildcard characters query TTBBBB select ascii_1, unicode_1,