Skip to content

Commit

Permalink
feat(parser): parse regular expression with regex parser (#4998)
Browse files Browse the repository at this point in the history
Many false positives and incorrect errors. @leaysgur Enjoy 😁

Run `just conformance` to update the snapshot.
  • Loading branch information
Boshen committed Aug 22, 2024
1 parent 05fff16 commit afe728a
Show file tree
Hide file tree
Showing 23 changed files with 12,774 additions and 9,627 deletions.
3 changes: 2 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ oxc_traverse = { version = "0.24.3", path = "crates/oxc_traverse" }
oxc_module_lexer = { version = "0.24.3", path = "crates/oxc_module_lexer" }
oxc_cfg = { version = "0.24.3", path = "crates/oxc_cfg" }
oxc_isolated_declarations = { version = "0.24.3", path = "crates/oxc_isolated_declarations" }
oxc_regular_expression = { version = "0.24.3", path = "crates/oxc_regular_expression" }
oxc_transform_napi = { version = "0.24.3", path = "napi/transform" }

# publish = false
Expand Down
11 changes: 6 additions & 5 deletions crates/oxc_parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@ workspace = true
doctest = false

[dependencies]
oxc_allocator = { workspace = true }
oxc_span = { workspace = true }
oxc_ast = { workspace = true }
oxc_syntax = { workspace = true }
oxc_diagnostics = { workspace = true }
oxc_allocator = { workspace = true }
oxc_span = { workspace = true }
oxc_ast = { workspace = true }
oxc_syntax = { workspace = true }
oxc_diagnostics = { workspace = true }
oxc_regular_expression = { workspace = true }

assert-unchecked = { workspace = true }
bitflags = { workspace = true }
Expand Down
29 changes: 28 additions & 1 deletion crates/oxc_parser/src/js/expression.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use oxc_allocator::Box;
use oxc_ast::ast::*;
use oxc_diagnostics::Result;
use oxc_regular_expression::ast::Pattern;
use oxc_span::{Atom, Span};
use oxc_syntax::{
number::{BigintBase, NumberBase},
Expand Down Expand Up @@ -342,15 +343,41 @@ impl<'a> ParserImpl<'a> {
let (pattern_end, flags) = self.read_regex();
let pattern_start = self.cur_token().start + 1; // +1 to exclude `/`
let pattern = &self.source_text[pattern_start as usize..pattern_end as usize];

self.bump_any();

let _pattern = self
.options
.parse_regular_expression
.then(|| self.parse_regex_pattern(pattern_start, pattern, flags));

self.ast.reg_exp_literal(
self.end_span(span),
EmptyObject,
RegExp { pattern: self.ast.atom(pattern), flags },
)
}

fn parse_regex_pattern(
&mut self,
span_offset: u32,
pattern: &'a str,
flags: RegExpFlags,
) -> Option<Pattern<'a>> {
use oxc_regular_expression::{ParserOptions, PatternParser};
let options = ParserOptions {
span_offset,
unicode_mode: flags.contains(RegExpFlags::U) || flags.contains(RegExpFlags::V),
unicode_sets_mode: flags.contains(RegExpFlags::V),
};
match PatternParser::new(self.ast.allocator, pattern, options).parse() {
Ok(regular_expression) => Some(regular_expression),
Err(diagnostic) => {
self.error(diagnostic);
None
}
}
}

pub(crate) fn parse_literal_string(&mut self) -> Result<StringLiteral<'a>> {
if !self.at(Kind::Str) {
return Err(self.unexpected());
Expand Down
11 changes: 10 additions & 1 deletion crates/oxc_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,11 @@ pub struct ParserReturn<'a> {
/// Parse options
#[derive(Debug, Clone, Copy)]
pub struct ParseOptions {
/// Whether to parse regular expressions or not.
///
/// Default: false
pub parse_regular_expression: bool,

/// Allow return outside of function
///
/// By default, a return statement at the top level raises an error.
Expand All @@ -124,7 +129,11 @@ pub struct ParseOptions {

impl Default for ParseOptions {
fn default() -> Self {
Self { allow_return_outside_function: false, preserve_parens: true }
Self {
parse_regular_expression: false,
allow_return_outside_function: false,
preserve_parens: true,
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion crates/oxc_regular_expression/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "oxc_regular_expression"
version = "0.0.0"
version = "0.24.3"
publish = false
authors.workspace = true
categories.workspace = true
Expand Down
2 changes: 1 addition & 1 deletion crates/oxc_regular_expression/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# oxc_regexp_parser
# oxc_regular_expression

Implements ECMAScript® 2024 Language Specification

Expand Down
12 changes: 10 additions & 2 deletions crates/oxc_regular_expression/src/body_parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ mod test {
use crate::{ParserOptions, PatternParser};
use oxc_allocator::Allocator;

// NOTE: These may be useless when integlation tests are added
#[test]
fn should_pass() {
let allocator = Allocator::default();
Expand Down Expand Up @@ -40,6 +39,7 @@ mod test {
(r"^(?=ab)\b(?!cd)(?<=ef)\B(?<!gh)$", ParserOptions::default()),
("a.b..", ParserOptions::default()),
(r"\d\D\s\S\w\W", ParserOptions::default()),
(r"\x", ParserOptions::default()),
(
r"\p{Emoji_Presentation}\P{Script_Extensions=Latin}\p{Sc}|\p{Basic_Emoji}",
ParserOptions::default(),
Expand All @@ -48,8 +48,10 @@ mod test {
r"\p{Emoji_Presentation}\P{Script_Extensions=Latin}\p{Sc}|\p{P}",
ParserOptions::default().with_unicode_mode(),
),
(r"^\p{General_Category=cntrl}+$", ParserOptions::default().with_unicode_mode()),
(r"\p{Basic_Emoji}", ParserOptions::default().with_unicode_sets_mode()),
(r"\n\cM\0\x41\u1f60\.\/", ParserOptions::default()),
(r"\c0", ParserOptions::default()),
(r"\0", ParserOptions::default()),
(r"\0", ParserOptions::default().with_unicode_mode()),
(r"\u", ParserOptions::default()),
Expand Down Expand Up @@ -137,7 +139,8 @@ mod test {
("a{,", ParserOptions::default().with_unicode_mode()),
("(?=a", ParserOptions::default()),
("(?<!a", ParserOptions::default()),
(r"\xa", ParserOptions::default()),
(r"\c0", ParserOptions::default().with_unicode_mode()),
(r"\xa", ParserOptions::default().with_unicode_mode()),
(r"a\u", ParserOptions::default().with_unicode_mode()),
(r"\p{Emoji_Presentation", ParserOptions::default().with_unicode_mode()),
(r"\p{Script=", ParserOptions::default().with_unicode_mode()),
Expand All @@ -152,6 +155,10 @@ mod test {
("a(?:", ParserOptions::default()),
("(a", ParserOptions::default()),
("(?<a>", ParserOptions::default()),
(r"(?<a\>.)", ParserOptions::default()),
(r"(?<a\>.)", ParserOptions::default().with_unicode_mode()),
(r"(?<\>.)", ParserOptions::default()),
(r"(?<\>.)", ParserOptions::default().with_unicode_mode()),
("(?)", ParserOptions::default()),
("(?=a){1}", ParserOptions::default().with_unicode_mode()),
("(?!a){1}", ParserOptions::default().with_unicode_mode()),
Expand Down Expand Up @@ -183,6 +190,7 @@ mod test {
let allocator = Allocator::default();

for (source_text, options, is_err) in &[
// No tests for 4,294,967,295 left parens
(r"(?<n>..)(?<n>..)", ParserOptions::default(), true),
(r"a{2,1}", ParserOptions::default(), true),
(r"(?<a>)\k<n>", ParserOptions::default(), true),
Expand Down
Loading

0 comments on commit afe728a

Please sign in to comment.