From 0517b5d1276215c461de28d84ef268e1751598ad Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Fri, 8 Nov 2024 16:15:13 +0100 Subject: [PATCH] Correctness and style fixes --- .../src/simplify_expressions/expr_simplifier.rs | 13 ++++++++----- .../sqllogictest/test_files/string/string_view.slt | 9 ++++----- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index f2e52be1b880..0f2f5c8c6c00 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -1472,6 +1472,8 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { // Rules for Like Expr::Like(like) => { + // `\` is implict escape, see https://github.com/apache/datafusion/issues/13291 + let escape_char = like.escape_char.unwrap_or('\\'); match as_string_scalar(&like.pattern) { Some((data_type, pattern_str)) => { match pattern_str { @@ -1498,10 +1500,11 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { }) } Some(pattern_str) - if pattern_str.contains("%%") && - // TODO support more complete unescaping - (like.escape_char.is_none() || pattern_str.contains(like.escape_char.unwrap())) => + if pattern_str.contains("%%") + && !pattern_str.contains(escape_char) => { + // Repeated occurrences of wildcard are redundant so remove them + // exp LIKE '%%' --> exp LIKE '%' let simplified_pattern = Regex::new("%%+") .unwrap() .replace_all(pattern_str, "%") @@ -1515,8 +1518,8 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { })) } Some(pattern_str) - if like.escape_char.is_none() - && !pattern_str.contains(['%', '_'].as_ref()) => + if !pattern_str + .contains(['%', '_', escape_char].as_ref()) => { // If the pattern does not contain any wildcards, we can simplify the like expression to an equality expression // TODO: handle escape characters diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt b/datafusion/sqllogictest/test_files/string/string_view.slt index f432b3e8d77f..dec5488d7319 100644 --- a/datafusion/sqllogictest/test_files/string/string_view.slt +++ b/datafusion/sqllogictest/test_files/string/string_view.slt @@ -391,14 +391,13 @@ drop table test_lowercase ## Ensure no casts for LIKE/ILIKE query TT EXPLAIN SELECT - column1_utf8view like 'foo' as "like", - column1_utf8view ilike 'foo' as "ilike" + column1_utf8view like '%foo%' as "like", + column1_utf8view ilike '%foo%' as "ilike" FROM test; ---- logical_plan -01)Projection: __common_expr_1 AS like, __common_expr_1 AS ilike -02)--Projection: test.column1_utf8view = Utf8View("foo") AS __common_expr_1 -03)----TableScan: test projection=[column1_utf8view] +01)Projection: test.column1_utf8view LIKE Utf8View("%foo%") AS like, test.column1_utf8view ILIKE Utf8View("%foo%") AS ilike +02)--TableScan: test projection=[column1_utf8view] query TT