Skip to content

Commit

Permalink
implement non-capturing regex groups and properly reset lastIndex to 0
Browse files Browse the repository at this point in the history
  • Loading branch information
y21 committed Dec 24, 2023
1 parent f6d7b42 commit 4ae8aa1
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 5 deletions.
1 change: 1 addition & 0 deletions cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ edition = "2018"

[features]
nodejs = []
stress_gc = ["dash_vm/stress_gc"]

[dependencies]
anyhow = "1.0"
Expand Down
11 changes: 10 additions & 1 deletion crates/dash_regex/src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@ pub enum CharacterClassItem {
Range(u8, u8),
}

#[cfg_attr(feature = "format", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq)]
pub enum GroupCaptureMode {
/// `(?:...)`
None,
/// `(...)`
Id(usize),
}

#[cfg_attr(feature = "format", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq)]
pub enum Node {
Expand All @@ -23,7 +32,7 @@ pub enum Node {
Anchor(Anchor),
Or(Vec<Node>, Vec<Node>),
Optional(Box<Node>),
Group(Vec<Node>),
Group(GroupCaptureMode, Vec<Node>),
}

impl Node {
Expand Down
24 changes: 21 additions & 3 deletions crates/dash_regex/src/parser.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
use std::mem;

use crate::error::Error;
use crate::node::{Anchor, CharacterClassItem, MetaSequence, Node};
use crate::node::{Anchor, CharacterClassItem, GroupCaptureMode, MetaSequence, Node};

pub struct Parser<'a> {
index: usize,
input: &'a [u8],
group_index: usize,
}

impl<'a> Parser<'a> {
pub fn new(input: &'a [u8]) -> Self {
Self { index: 0, input }
Self {
index: 0,
input,
group_index: 1, // 0 is the entire match
}
}

/// Advances the index and returns the previous byte
Expand Down Expand Up @@ -153,6 +158,19 @@ impl<'a> Parser<'a> {

fn parse_group(&mut self) -> Result<Node, Error> {
let mut nodes = Vec::new();
// ?: = non-capturing group
let capture_mode = if self.current() == Some(b'?') {
self.advance();
if self.current() == Some(b':') {
self.advance();
GroupCaptureMode::None
} else {
self.group_index += 1;
GroupCaptureMode::Id(self.group_index - 1)
}
} else {
GroupCaptureMode::None
};

while !self.is_eof() {
match self.current() {
Expand All @@ -178,7 +196,7 @@ impl<'a> Parser<'a> {
}
}

Ok(Node::Group(nodes))
Ok(Node::Group(capture_mode, nodes))
}

fn parse_escape(&mut self) -> Result<Node, Error> {
Expand Down
2 changes: 1 addition & 1 deletion crates/dash_regex/src/visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ impl<'a> Visit<'a> for Node {
node.matches(s);
true
}
Node::Group(group) => group.iter().all(|node| node.matches(s)),
Node::Group(_, group) => group.iter().all(|node| node.matches(s)),
Node::Or(left, right) => {
let left_index = s.index();
let left_matches = left.iter().all(|node| node.matches(s));
Expand Down
1 change: 1 addition & 0 deletions crates/dash_vm/src/js_std/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ pub fn test(cx: CallContext) -> Result<Value, Value> {
last_index.set(last_index.get() + m.groups[0].end);
Ok(Value::Boolean(true))
} else {
last_index.set(0);
Ok(Value::Boolean(false))
}
}

0 comments on commit 4ae8aa1

Please sign in to comment.