From 379bd23816b3e2d32d14316d433de4037361e2bc Mon Sep 17 00:00:00 2001 From: Alex Huang Date: Tue, 7 Mar 2023 11:40:15 +0100 Subject: [PATCH] fix: regexp_match skips first match (#3807) --- arrow-string/src/regexp.rs | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/arrow-string/src/regexp.rs b/arrow-string/src/regexp.rs index bf6e60cfeaaa..4b1e2dcde228 100644 --- a/arrow-string/src/regexp.rs +++ b/arrow-string/src/regexp.rs @@ -182,6 +182,7 @@ pub fn regexp_match( .map(|pattern| pattern.map(|pattern| pattern.to_string())), ), }; + array .iter() .zip(complete_pattern) @@ -209,9 +210,14 @@ pub fn regexp_match( }; match re.captures(value) { Some(caps) => { - for m in caps.iter().skip(1).flatten() { + let mut iter = caps.iter(); + if caps.len() > 1 { + iter.next(); + } + for m in iter.flatten() { list_builder.values().append_value(m.as_str()); } + list_builder.append(true); } None => list_builder.append(false), @@ -282,6 +288,20 @@ mod tests { assert_eq!(&expected, result); } + #[test] + fn test_single_group_not_skip_match() { + let array = StringArray::from(vec![Some("foo"), Some("bar")]); + let pattern = GenericStringArray::::from(vec![r"foo"]); + let actual = regexp_match(&array, &pattern, None).unwrap(); + let result = actual.as_any().downcast_ref::().unwrap(); + let elem_builder: GenericStringBuilder = GenericStringBuilder::new(); + let mut expected_builder = ListBuilder::new(elem_builder); + expected_builder.values().append_value("foo"); + expected_builder.append(true); + let expected = expected_builder.finish(); + assert_eq!(&expected, result); + } + macro_rules! test_flag_utf8 { ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => { #[test]