From f6d7b42316b3ad18b5413d27c6f02f7fdcf6146d Mon Sep 17 00:00:00 2001 From: y21 <30553356+y21@users.noreply.github.com> Date: Sun, 24 Dec 2023 20:26:45 +0100 Subject: [PATCH] implement `lastIndex` regex logic --- Cargo.lock | 1 + crates/dash_regex/Cargo.toml | 1 + crates/dash_regex/src/lib.rs | 2 +- crates/dash_regex/src/matcher.rs | 19 ++++++++++++++++--- crates/dash_vm/src/js_std/regex.rs | 18 ++++++++++++++---- crates/dash_vm/src/value/regex.rs | 19 ++++++++++++------- 6 files changed, 45 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 462c345e..78144f5d 100755 --- a/Cargo.lock +++ b/Cargo.lock @@ -498,6 +498,7 @@ name = "dash_regex" version = "0.1.0" dependencies = [ "serde", + "smallvec", "thiserror", ] diff --git a/crates/dash_regex/Cargo.toml b/crates/dash_regex/Cargo.toml index 5dfc60ec..f8198cd5 100644 --- a/crates/dash_regex/Cargo.toml +++ b/crates/dash_regex/Cargo.toml @@ -11,3 +11,4 @@ format = ["serde"] [dependencies] thiserror = "1.0.37" serde = { version = "1.0", features = ["derive", "rc"], optional = true } +smallvec = { version = "1.9.0", features = ["const_generics"] } diff --git a/crates/dash_regex/src/lib.rs b/crates/dash_regex/src/lib.rs index 9e861bcc..9a1c4204 100644 --- a/crates/dash_regex/src/lib.rs +++ b/crates/dash_regex/src/lib.rs @@ -22,7 +22,7 @@ pub fn test() { fn matches(regex: &str, input: &str) -> bool { let nodes = Parser::new(regex.as_bytes()).parse_all().unwrap(); let mut matcher = Matcher::new(&nodes, input.as_bytes()); - matcher.matches() + matcher.matches().is_some() } const HEX_REGEX: &str = "^#?([0-9a-fA-F]{2})([0-9a-fA-F]{2})([0-9a-fA-F]{2})([0-9a-fA-F]{2})$"; diff --git a/crates/dash_regex/src/matcher.rs b/crates/dash_regex/src/matcher.rs index 2448975f..09609a96 100644 --- a/crates/dash_regex/src/matcher.rs +++ b/crates/dash_regex/src/matcher.rs @@ -1,3 +1,7 @@ +use std::ops::Range; + +use smallvec::{smallvec, SmallVec}; + use crate::node::Node; use crate::stream::BorrowedStream; use crate::visitor::Visit; @@ -7,6 +11,11 @@ pub struct Matcher<'a> { text: BorrowedStream<'a, u8>, } +#[derive(Debug)] +pub struct Match { + pub groups: SmallVec<[Range; 1]>, +} + impl<'a> Matcher<'a> { pub fn new(nodes: &'a [Node], text: &'a [u8]) -> Self { Self { @@ -15,13 +24,17 @@ impl<'a> Matcher<'a> { } } - pub fn matches(&mut self) -> bool { + pub fn matches(&mut self) -> Option { let mut index = self.text.index(); + // TODO: what if text.len() == 0? + while index < self.text.len() { if self.nodes.is_eof() { // all regex nodes matched - return true; + return Some(Match { + groups: smallvec![index..self.text.index()], + }); } if !self.matches_single() { @@ -31,7 +44,7 @@ impl<'a> Matcher<'a> { } } - false + None } pub fn matches_single(&mut self) -> bool { diff --git a/crates/dash_vm/src/js_std/regex.rs b/crates/dash_vm/src/js_std/regex.rs index ec9ac8d7..490d7d92 100644 --- a/crates/dash_vm/src/js_std/regex.rs +++ b/crates/dash_vm/src/js_std/regex.rs @@ -1,7 +1,7 @@ use crate::throw; use crate::value::function::native::CallContext; use crate::value::ops::conversions::ValueConversion; -use crate::value::regex::RegExp; +use crate::value::regex::{RegExp, RegExpInner}; use crate::value::{Value, ValueContext}; use dash_regex::matcher::Matcher as RegexMatcher; use dash_regex::parser::Parser as RegexParser; @@ -27,11 +27,21 @@ pub fn test(cx: CallContext) -> Result { None => throw!(cx.scope, TypeError, "Receiver must be a RegExp"), }; - let (nodes, _) = match regex.inner() { + let RegExpInner { nodes, last_index, .. } = match regex.inner() { Some(nodes) => nodes, None => throw!(cx.scope, TypeError, "Receiver must be an initialized RegExp object"), }; - let mut matcher = RegexMatcher::new(nodes, text.as_bytes()); - Ok(Value::Boolean(matcher.matches())) + if last_index.get() >= text.len() { + last_index.set(0); + return Ok(Value::Boolean(false)); + } + + let mut matcher = RegexMatcher::new(nodes, text[last_index.get()..].as_bytes()); + if let Some(m) = matcher.matches() { + last_index.set(last_index.get() + m.groups[0].end); + Ok(Value::Boolean(true)) + } else { + Ok(Value::Boolean(false)) + } } diff --git a/crates/dash_vm/src/value/regex.rs b/crates/dash_vm/src/value/regex.rs index 73f7dac8..2b9aea01 100644 --- a/crates/dash_vm/src/value/regex.rs +++ b/crates/dash_vm/src/value/regex.rs @@ -1,3 +1,4 @@ +use std::cell::Cell; use std::rc::Rc; use dash_proc_macro::Trace; @@ -9,8 +10,10 @@ use super::object::{NamedObject, Object}; #[derive(Debug)] pub struct RegExpInner { - nodes: Vec, - source: Rc, + pub nodes: Vec, + pub source: Rc, + // TODO: this should only exist if the `g` flag is set (we currently don't even have regex flags) + pub last_index: Cell, } #[derive(Debug, Trace)] @@ -25,7 +28,11 @@ impl RegExp { let ctor = vm.statics.regexp_ctor.clone(); Self { - inner: Some(RegExpInner { nodes, source }), + inner: Some(RegExpInner { + nodes, + source, + last_index: Cell::new(0), + }), object: NamedObject::with_prototype_and_constructor(proto, ctor), } } @@ -37,10 +44,8 @@ impl RegExp { } } - pub fn inner(&self) -> Option<(&[Node], &str)> { - self.inner - .as_ref() - .map(|inner| (inner.nodes.as_slice(), inner.source.as_ref())) + pub fn inner(&self) -> Option<&RegExpInner> { + self.inner.as_ref() } }