Skip to content

Commit

Permalink
fix: regexp_match skips first match (apache#3807)
Browse files Browse the repository at this point in the history
  • Loading branch information
Weijun-H authored Mar 7, 2023
1 parent 14544fb commit 379bd23
Showing 1 changed file with 21 additions and 1 deletion.
22 changes: 21 additions & 1 deletion arrow-string/src/regexp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ pub fn regexp_match<OffsetSize: OffsetSizeTrait>(
.map(|pattern| pattern.map(|pattern| pattern.to_string())),
),
};

array
.iter()
.zip(complete_pattern)
Expand Down Expand Up @@ -209,9 +210,14 @@ pub fn regexp_match<OffsetSize: OffsetSizeTrait>(
};
match re.captures(value) {
Some(caps) => {
for m in caps.iter().skip(1).flatten() {
let mut iter = caps.iter();
if caps.len() > 1 {
iter.next();
}
for m in iter.flatten() {
list_builder.values().append_value(m.as_str());
}

list_builder.append(true);
}
None => list_builder.append(false),
Expand Down Expand Up @@ -282,6 +288,20 @@ mod tests {
assert_eq!(&expected, result);
}

#[test]
fn test_single_group_not_skip_match() {
let array = StringArray::from(vec![Some("foo"), Some("bar")]);
let pattern = GenericStringArray::<i32>::from(vec![r"foo"]);
let actual = regexp_match(&array, &pattern, None).unwrap();
let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
let elem_builder: GenericStringBuilder<i32> = GenericStringBuilder::new();
let mut expected_builder = ListBuilder::new(elem_builder);
expected_builder.values().append_value("foo");
expected_builder.append(true);
let expected = expected_builder.finish();
assert_eq!(&expected, result);
}

macro_rules! test_flag_utf8 {
($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
#[test]
Expand Down

0 comments on commit 379bd23

Please sign in to comment.