Skip to content

Commit

Permalink
implement lastIndex regex logic
Browse files Browse the repository at this point in the history
  • Loading branch information
y21 committed Dec 24, 2023
1 parent e02aa62 commit f6d7b42
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 15 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/dash_regex/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ format = ["serde"]
[dependencies]
thiserror = "1.0.37"
serde = { version = "1.0", features = ["derive", "rc"], optional = true }
smallvec = { version = "1.9.0", features = ["const_generics"] }
2 changes: 1 addition & 1 deletion crates/dash_regex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ pub fn test() {
fn matches(regex: &str, input: &str) -> bool {
let nodes = Parser::new(regex.as_bytes()).parse_all().unwrap();
let mut matcher = Matcher::new(&nodes, input.as_bytes());
matcher.matches()
matcher.matches().is_some()
}

const HEX_REGEX: &str = "^#?([0-9a-fA-F]{2})([0-9a-fA-F]{2})([0-9a-fA-F]{2})([0-9a-fA-F]{2})$";
Expand Down
19 changes: 16 additions & 3 deletions crates/dash_regex/src/matcher.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
use std::ops::Range;

use smallvec::{smallvec, SmallVec};

use crate::node::Node;
use crate::stream::BorrowedStream;
use crate::visitor::Visit;
Expand All @@ -7,6 +11,11 @@ pub struct Matcher<'a> {
text: BorrowedStream<'a, u8>,
}

#[derive(Debug)]
pub struct Match {
pub groups: SmallVec<[Range<usize>; 1]>,
}

impl<'a> Matcher<'a> {
pub fn new(nodes: &'a [Node], text: &'a [u8]) -> Self {
Self {
Expand All @@ -15,13 +24,17 @@ impl<'a> Matcher<'a> {
}
}

pub fn matches(&mut self) -> bool {
pub fn matches(&mut self) -> Option<Match> {
let mut index = self.text.index();

// TODO: what if text.len() == 0?

while index < self.text.len() {
if self.nodes.is_eof() {
// all regex nodes matched
return true;
return Some(Match {
groups: smallvec![index..self.text.index()],
});
}

if !self.matches_single() {
Expand All @@ -31,7 +44,7 @@ impl<'a> Matcher<'a> {
}
}

false
None
}

pub fn matches_single(&mut self) -> bool {
Expand Down
18 changes: 14 additions & 4 deletions crates/dash_vm/src/js_std/regex.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::throw;
use crate::value::function::native::CallContext;
use crate::value::ops::conversions::ValueConversion;
use crate::value::regex::RegExp;
use crate::value::regex::{RegExp, RegExpInner};
use crate::value::{Value, ValueContext};
use dash_regex::matcher::Matcher as RegexMatcher;
use dash_regex::parser::Parser as RegexParser;
Expand All @@ -27,11 +27,21 @@ pub fn test(cx: CallContext) -> Result<Value, Value> {
None => throw!(cx.scope, TypeError, "Receiver must be a RegExp"),
};

let (nodes, _) = match regex.inner() {
let RegExpInner { nodes, last_index, .. } = match regex.inner() {
Some(nodes) => nodes,
None => throw!(cx.scope, TypeError, "Receiver must be an initialized RegExp object"),
};

let mut matcher = RegexMatcher::new(nodes, text.as_bytes());
Ok(Value::Boolean(matcher.matches()))
if last_index.get() >= text.len() {
last_index.set(0);
return Ok(Value::Boolean(false));
}

let mut matcher = RegexMatcher::new(nodes, text[last_index.get()..].as_bytes());
if let Some(m) = matcher.matches() {
last_index.set(last_index.get() + m.groups[0].end);
Ok(Value::Boolean(true))
} else {
Ok(Value::Boolean(false))
}
}
19 changes: 12 additions & 7 deletions crates/dash_vm/src/value/regex.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::cell::Cell;
use std::rc::Rc;

use dash_proc_macro::Trace;
Expand All @@ -9,8 +10,10 @@ use super::object::{NamedObject, Object};

#[derive(Debug)]
pub struct RegExpInner {
nodes: Vec<Node>,
source: Rc<str>,
pub nodes: Vec<Node>,
pub source: Rc<str>,
// TODO: this should only exist if the `g` flag is set (we currently don't even have regex flags)
pub last_index: Cell<usize>,
}

#[derive(Debug, Trace)]
Expand All @@ -25,7 +28,11 @@ impl RegExp {
let ctor = vm.statics.regexp_ctor.clone();

Self {
inner: Some(RegExpInner { nodes, source }),
inner: Some(RegExpInner {
nodes,
source,
last_index: Cell::new(0),
}),
object: NamedObject::with_prototype_and_constructor(proto, ctor),
}
}
Expand All @@ -37,10 +44,8 @@ impl RegExp {
}
}

pub fn inner(&self) -> Option<(&[Node], &str)> {
self.inner
.as_ref()
.map(|inner| (inner.nodes.as_slice(), inner.source.as_ref()))
pub fn inner(&self) -> Option<&RegExpInner> {
self.inner.as_ref()
}
}

Expand Down

0 comments on commit f6d7b42

Please sign in to comment.