Skip to content

Commit

Permalink
add RegExp.prototype.exec
Browse files Browse the repository at this point in the history
  • Loading branch information
y21 committed Dec 25, 2023
1 parent 4ae8aa1 commit 9cfb261
Show file tree
Hide file tree
Showing 11 changed files with 149 additions and 44 deletions.
2 changes: 1 addition & 1 deletion crates/dash_middle/src/compiler/constant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ pub enum Constant {
Identifier(Rc<str>),
Boolean(bool),
Function(Rc<Function>),
Regex(dash_regex::Regex, Rc<str>),
Regex(dash_regex::ParsedRegex, Rc<str>),
Null,
Undefined,
}
Expand Down
4 changes: 2 additions & 2 deletions crates/dash_middle/src/parser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ impl ExprKind {
Self::Literal(LiteralExpr::Undefined)
}

pub fn regex_literal(regex: dash_regex::Regex, source: Symbol) -> Self {
pub fn regex_literal(regex: dash_regex::ParsedRegex, source: Symbol) -> Self {
Self::Literal(LiteralExpr::Regex(regex, source))
}

Expand Down Expand Up @@ -499,7 +499,7 @@ pub enum LiteralExpr {
String(Symbol),

#[display(fmt = "/{_1}/")]
Regex(dash_regex::Regex, Symbol),
Regex(dash_regex::ParsedRegex, Symbol),

#[display(fmt = "null")]
Null,
Expand Down
21 changes: 19 additions & 2 deletions crates/dash_regex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ pub mod parser;
mod stream;
mod visitor;

pub type Regex = Vec<Node>;
pub use parser::ParsedRegex;

#[cfg(test)]
#[test]
Expand All @@ -22,7 +22,20 @@ pub fn test() {
fn matches(regex: &str, input: &str) -> bool {
let nodes = Parser::new(regex.as_bytes()).parse_all().unwrap();
let mut matcher = Matcher::new(&nodes, input.as_bytes());
matcher.matches().is_some()
matcher.matches()
}

fn matches_groups(regex: &str, input: &str, groups: &[&str]) -> bool {
let nodes = Parser::new(regex.as_bytes()).parse_all().unwrap();
let mut matcher = Matcher::new(&nodes, input.as_bytes());
matcher.matches()
&& nodes.group_count - 1 == groups.len()
&& matcher
.groups
.iter()
.skip(1)
.zip(groups)
.all(|(group, expected)| group.map(|range| &input[range]) == Some(*expected))
}

const HEX_REGEX: &str = "^#?([0-9a-fA-F]{2})([0-9a-fA-F]{2})([0-9a-fA-F]{2})([0-9a-fA-F]{2})$";
Expand All @@ -33,4 +46,8 @@ pub fn test() {
assert!(matches("\\d", "a1"));
assert!(matches("V\\dX", "aV1aVaXaV1Xs"));
assert!(!matches("V\\dX", "aV1aVaXaV?Xs"));

const RGB: &str = r"rgb[\s|\(]+((?:[-\+]?\d*\.\d+%?)|(?:[-\+]?\d+%?))[,|\s]+((?:[-\+]?\d*\.\d+%?)|(?:[-\+]?\d+%?))[,|\s]+((?:[-\+]?\d*\.\d+%?)|(?:[-\+]?\d+%?))\s*\)?";
assert!(matches(RGB, "rgb(255, 255, 255)"));
assert!(matches_groups(RGB, "rgb(144, 17, 9)", &["144", "17", "9"]));
}
40 changes: 29 additions & 11 deletions crates/dash_regex/src/matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,38 +3,56 @@ use std::ops::Range;
use smallvec::{smallvec, SmallVec};

use crate::node::Node;
use crate::parser::ParsedRegex;
use crate::stream::BorrowedStream;
use crate::visitor::Visit;

pub struct Matcher<'a> {
nodes: BorrowedStream<'a, Node>,
text: BorrowedStream<'a, u8>,
pub groups: Groups,
}

#[derive(Debug)]
pub struct Match {
pub groups: SmallVec<[Range<usize>; 1]>,
#[derive(Debug, Clone)]
pub struct Groups(SmallVec<[Option<Range<usize>>; 1]>);

impl Groups {
pub fn new(count: usize) -> Self {
Self(smallvec![None; count])
}

pub fn set(&mut self, index: usize, range: Range<usize>) {
self.0[index] = Some(range);
}

pub fn get(&mut self, index: usize) -> Option<Range<usize>> {
self.0[index].clone()
}

pub fn iter(&self) -> impl Iterator<Item = Option<Range<usize>>> + '_ {
self.0.iter().cloned()
}
}

impl<'a> Matcher<'a> {
pub fn new(nodes: &'a [Node], text: &'a [u8]) -> Self {
pub fn new(parsed_regex: &'a ParsedRegex, text: &'a [u8]) -> Self {
Self {
nodes: BorrowedStream::new(nodes),
nodes: BorrowedStream::new(parsed_regex.nodes.as_slice()),
text: BorrowedStream::new(text),
groups: Groups::new(parsed_regex.group_count),
}
}

pub fn matches(&mut self) -> Option<Match> {
pub fn matches(&mut self) -> bool {
let mut index = self.text.index();

// TODO: what if text.len() == 0?

while index < self.text.len() {
if self.nodes.is_eof() {
// all regex nodes matched
return Some(Match {
groups: smallvec![index..self.text.index()],
});
self.groups.set(0, index..self.text.index());
return true;
}

if !self.matches_single() {
Expand All @@ -44,11 +62,11 @@ impl<'a> Matcher<'a> {
}
}

None
false
}

pub fn matches_single(&mut self) -> bool {
let node = self.nodes.next().unwrap();
node.matches(&mut self.text)
node.matches(&mut self.text, &mut self.groups)
}
}
19 changes: 16 additions & 3 deletions crates/dash_regex/src/parser.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use std::mem;

use serde::{Deserialize, Serialize};

use crate::error::Error;
use crate::node::{Anchor, CharacterClassItem, GroupCaptureMode, MetaSequence, Node};

Expand All @@ -9,6 +11,13 @@ pub struct Parser<'a> {
group_index: usize,
}

#[derive(Debug, Clone)]
#[cfg_attr(feature = "format", derive(Serialize, Deserialize))]
pub struct ParsedRegex {
pub nodes: Vec<Node>,
pub group_count: usize,
}

impl<'a> Parser<'a> {
pub fn new(input: &'a [u8]) -> Self {
Self {
Expand Down Expand Up @@ -41,7 +50,7 @@ impl<'a> Parser<'a> {
self.index >= self.input.len()
}

pub fn parse_all(mut self) -> Result<Vec<Node>, Error> {
pub fn parse_all(mut self) -> Result<ParsedRegex, Error> {
let mut nodes = Vec::new();
while !self.is_eof() {
if let Some(b'|') = self.current() {
Expand All @@ -62,7 +71,10 @@ impl<'a> Parser<'a> {
nodes.push(self.parse_primary()?);
}
}
Ok(nodes)
Ok(ParsedRegex {
nodes,
group_count: self.group_index,
})
}

fn parse_primary(&mut self) -> Result<Node, Error> {
Expand Down Expand Up @@ -169,7 +181,8 @@ impl<'a> Parser<'a> {
GroupCaptureMode::Id(self.group_index - 1)
}
} else {
GroupCaptureMode::None
self.group_index += 1;
GroupCaptureMode::Id(self.group_index - 1)
};

while !self.is_eof() {
Expand Down
38 changes: 25 additions & 13 deletions crates/dash_regex/src/visitor.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
use crate::node::{Anchor, CharacterClassItem, MetaSequence, Node};
use crate::matcher::Groups;
use crate::node::{Anchor, CharacterClassItem, GroupCaptureMode, MetaSequence, Node};
use crate::stream::BorrowedStream;

pub trait Visit<'a> {
fn matches(&self, s: &mut BorrowedStream<'a, u8>) -> bool;
fn matches(&self, s: &mut BorrowedStream<'a, u8>, groups: &mut Groups) -> bool;
}

impl<'a> Visit<'a> for Anchor {
fn matches(&self, s: &mut BorrowedStream<'a, u8>) -> bool {
fn matches(&self, s: &mut BorrowedStream<'a, u8>, _: &mut Groups) -> bool {
match self {
Anchor::StartOfString => s.index() == 0,
Anchor::EndOfString => s.is_eof(),
Expand All @@ -15,7 +16,7 @@ impl<'a> Visit<'a> for Anchor {
}

impl<'a> Visit<'a> for MetaSequence {
fn matches(&self, s: &mut BorrowedStream<'a, u8>) -> bool {
fn matches(&self, s: &mut BorrowedStream<'a, u8>, _: &mut Groups) -> bool {
match self {
Self::Digit => {
let is_digit = s.current().map(|c| c.is_ascii_digit()).unwrap_or(false);
Expand Down Expand Up @@ -46,7 +47,7 @@ impl<'a> Visit<'a> for MetaSequence {
}

impl<'a> Visit<'a> for Node {
fn matches(&self, s: &mut BorrowedStream<'a, u8>) -> bool {
fn matches(&self, s: &mut BorrowedStream<'a, u8>, groups: &mut Groups) -> bool {
match self {
Node::LiteralCharacter(lit) => {
let matches = s.current().map(|c| c == lit).unwrap_or(false);
Expand All @@ -56,21 +57,32 @@ impl<'a> Visit<'a> for Node {
matches
}
Node::Optional(node) => {
node.matches(s);
node.matches(s, groups);
true
}
Node::Group(_, group) => group.iter().all(|node| node.matches(s)),
Node::Group(capture, group) => {
let before = s.index();
let all_matched = group.iter().all(|node| node.matches(s, groups));

match capture {
GroupCaptureMode::Id(id) if all_matched => {
groups.set(*id, before..s.index());
true
}
_ => all_matched,
}
}
Node::Or(left, right) => {
let left_index = s.index();
let left_matches = left.iter().all(|node| node.matches(s));
let left_matches = left.iter().all(|node| node.matches(s, groups));
if left_matches {
return true;
}
s.set_index(left_index);
right.iter().all(|node| node.matches(s))
right.iter().all(|node| node.matches(s, groups))
}
Node::Anchor(anchor) => anchor.matches(s),
Node::MetaSequence(seq) => seq.matches(s),
Node::Anchor(anchor) => anchor.matches(s, groups),
Node::MetaSequence(seq) => seq.matches(s, groups),
Node::Repetition { node, min, max } => {
let mut count = 0;
while !s.is_eof() {
Expand All @@ -80,7 +92,7 @@ impl<'a> Visit<'a> for Node {
}
}

if !node.matches(s) {
if !node.matches(s, groups) {
break;
}
count += 1;
Expand All @@ -98,7 +110,7 @@ impl<'a> Visit<'a> for Node {
let Some(&cur) = s.current() else { return false };

nodes.iter().any(|node| match *node {
CharacterClassItem::Node(ref node) => node.matches(s),
CharacterClassItem::Node(ref node) => node.matches(s, groups),
CharacterClassItem::Range(start, end) => {
let matches = (start..=end).contains(&cur);
if matches {
Expand Down
52 changes: 48 additions & 4 deletions crates/dash_vm/src/js_std/regex.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use crate::throw;
use crate::value::array::Array;
use crate::value::function::native::CallContext;
use crate::value::object::PropertyValue;
use crate::value::ops::conversions::ValueConversion;
use crate::value::regex::{RegExp, RegExpInner};
use crate::value::{Value, ValueContext};
Expand Down Expand Up @@ -27,7 +29,7 @@ pub fn test(cx: CallContext) -> Result<Value, Value> {
None => throw!(cx.scope, TypeError, "Receiver must be a RegExp"),
};

let RegExpInner { nodes, last_index, .. } = match regex.inner() {
let RegExpInner { regex, last_index, .. } = match regex.inner() {
Some(nodes) => nodes,
None => throw!(cx.scope, TypeError, "Receiver must be an initialized RegExp object"),
};
Expand All @@ -37,12 +39,54 @@ pub fn test(cx: CallContext) -> Result<Value, Value> {
return Ok(Value::Boolean(false));
}

let mut matcher = RegexMatcher::new(nodes, text[last_index.get()..].as_bytes());
if let Some(m) = matcher.matches() {
last_index.set(last_index.get() + m.groups[0].end);
let mut matcher = RegexMatcher::new(regex, text[last_index.get()..].as_bytes());
if matcher.matches() {
last_index.set(last_index.get() + matcher.groups.get(0).unwrap().end);
Ok(Value::Boolean(true))
} else {
last_index.set(0);
Ok(Value::Boolean(false))
}
}

pub fn exec(cx: CallContext<'_, '_>) -> Result<Value, Value> {
let text = cx.args.first().unwrap_or_undefined().to_string(cx.scope)?;

let regex = match cx.this.downcast_ref::<RegExp>() {
Some(regex) => regex,
None => throw!(cx.scope, TypeError, "Receiver must be a RegExp"),
};

let RegExpInner { regex, last_index, .. } = match regex.inner() {
Some(nodes) => nodes,
None => throw!(cx.scope, TypeError, "Receiver must be an initialized RegExp object"),
};

if last_index.get() >= text.len() {
last_index.set(0);
return Ok(Value::null());
}

let mut matcher = RegexMatcher::new(regex, text[last_index.get()..].as_bytes());
if matcher.matches() {
last_index.set(last_index.get() + matcher.groups.get(0).unwrap().end);
let groups = Array::from_vec(
cx.scope,
matcher
.groups
.iter()
.map(|g| {
let sub = match g {
Some(r) => text[r].into(),
None => cx.scope.statics.null_str(),
};
PropertyValue::static_default(Value::String(sub))
})
.collect(),
);
Ok(Value::Object(cx.scope.register(groups)))
} else {
last_index.set(0);
Ok(Value::null())
}
}
1 change: 1 addition & 0 deletions crates/dash_vm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -869,6 +869,7 @@ impl Vm {
regexp_ctor.clone(),
[
("test", scope.statics.regexp_test.clone()),
("exec", scope.statics.regexp_exec.clone())
],
[],
[],
Expand Down
2 changes: 2 additions & 0 deletions crates/dash_vm/src/statics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ pub struct Statics {
pub regexp_ctor: Handle<dyn Object>,
pub regexp_prototype: Handle<dyn Object>,
pub regexp_test: Handle<dyn Object>,
pub regexp_exec: Handle<dyn Object>,
pub date_ctor: Handle<dyn Object>,
pub date_prototype: Handle<dyn Object>,
pub date_now: Handle<dyn Object>,
Expand Down Expand Up @@ -503,6 +504,7 @@ impl Statics {
regexp_ctor: function(gc, "RegExp", js_std::regex::constructor),
regexp_prototype: builtin_object(gc, RegExp::empty()),
regexp_test: function(gc, "test", js_std::regex::test),
regexp_exec: function(gc, "exec", js_std::regex::exec),
date_ctor: function(gc, "Date", js_std::date::constructor),
date_prototype: builtin_object(gc, NamedObject::null()),
date_now: function(gc, "now", js_std::date::now),
Expand Down
Loading

0 comments on commit 9cfb261

Please sign in to comment.