From 860a4e679d11b666b6d4229c47db9e21c0e23f60 Mon Sep 17 00:00:00 2001
From: Ubugeeei <ubuge1122@gmail.com>
Date: Fri, 17 Nov 2023 10:28:06 +0900
Subject: [PATCH 1/5] init js_regex

---
 Cargo.lock                             |  10 +
 crates/oxc_js_regex/Cargo.toml         |  21 ++
 crates/oxc_js_regex/README.md          |   5 +
 crates/oxc_js_regex/src/ast.rs         | 387 +++++++++++++++++++++++++
 crates/oxc_js_regex/src/lexer/mod.rs   |   1 +
 crates/oxc_js_regex/src/lexer/token.rs |   1 +
 crates/oxc_js_regex/src/lib.rs         |   5 +
 crates/oxc_js_regex/src/parser.rs      |   0
 crates/oxc_js_regex/src/validator.rs   |   0
 crates/oxc_js_regex/src/visitor.rs     |   0
 10 files changed, 430 insertions(+)
 create mode 100644 crates/oxc_js_regex/Cargo.toml
 create mode 100644 crates/oxc_js_regex/README.md
 create mode 100644 crates/oxc_js_regex/src/ast.rs
 create mode 100644 crates/oxc_js_regex/src/lexer/mod.rs
 create mode 100644 crates/oxc_js_regex/src/lexer/token.rs
 create mode 100644 crates/oxc_js_regex/src/lib.rs
 create mode 100644 crates/oxc_js_regex/src/parser.rs
 create mode 100644 crates/oxc_js_regex/src/validator.rs
 create mode 100644 crates/oxc_js_regex/src/visitor.rs

diff --git a/Cargo.lock b/Cargo.lock
index c07988158702d..6431000ee2c05 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1624,6 +1624,16 @@ dependencies = [
  "static_assertions",
 ]
 
+[[package]]
+name = "oxc_js_regex"
+version = "0.0.0"
+dependencies = [
+ "oxc_allocator",
+ "oxc_index",
+ "oxc_span",
+ "oxc_syntax",
+]
+
 [[package]]
 name = "oxc_linter"
 version = "0.0.0"
diff --git a/crates/oxc_js_regex/Cargo.toml b/crates/oxc_js_regex/Cargo.toml
new file mode 100644
index 0000000000000..895809c4f7b46
--- /dev/null
+++ b/crates/oxc_js_regex/Cargo.toml
@@ -0,0 +1,21 @@
+[package]
+name                   = "oxc_js_regex"
+version                = "0.0.0"
+publish                = false
+authors.workspace      = true
+categories.workspace   = true
+description.workspace  = true
+edition.workspace      = true
+homepage.workspace     = true
+keywords.workspace     = true
+license.workspace      = true
+repository.workspace   = true
+rust-version.workspace = true
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+oxc_allocator = { workspace = true }
+oxc_span      = { workspace = true }
+oxc_syntax    = { workspace = true }
+oxc_index     = { workspace = true }
diff --git a/crates/oxc_js_regex/README.md b/crates/oxc_js_regex/README.md
new file mode 100644
index 0000000000000..f1e6b2fc5370f
--- /dev/null
+++ b/crates/oxc_js_regex/README.md
@@ -0,0 +1,5 @@
+# oxc_js_regex
+
+⚠️ Work in progress. Do not use yet.
+
+see: https://github.com/oxc-project/oxc/issues/1164
\ No newline at end of file
diff --git a/crates/oxc_js_regex/src/ast.rs b/crates/oxc_js_regex/src/ast.rs
new file mode 100644
index 0000000000000..65cf93b30c1b5
--- /dev/null
+++ b/crates/oxc_js_regex/src/ast.rs
@@ -0,0 +1,387 @@
+//! [`@eslint-community/regexpp`](https://github.com/eslint-community/regexpp/blob/2e8f1af992fb12eae46a446253e8fa3f6cede92a/src/ast.ts)
+
+use oxc_allocator::{Box, Vec};
+use oxc_span::{Atom, Span};
+
+/// The type which includes all nodes.
+#[derive(Debug)]
+pub enum Node<'a> {
+    Branch(Box<'a, Branch<'a>>),
+    Leaf(Box<'a, Leaf<'a>>),
+}
+
+/// The type which includes all branch nodes.
+#[derive(Debug)]
+pub enum Branch<'a> {
+    Alternative(Box<'a, Alternative<'a>>),
+    CapturingGroup(Box<'a, CapturingGroup<'a>>),
+    CharacterClass(Box<'a, CharacterClass<'a>>),
+    CharacterClassRange(Box<'a, CharacterClassRange>),
+    ClassIntersection(Box<'a, ClassIntersection<'a>>),
+    ClassStringDisjunction(Box<'a, ClassStringDisjunction<'a>>),
+    ClassSubtraction(Box<'a, ClassSubtraction<'a>>),
+    ExpressionCharacterClass(Box<'a, ExpressionCharacterClass<'a>>),
+    Group(Box<'a, Group<'a>>),
+    LookaroundAssertion(Box<'a, LookaroundAssertion<'a>>),
+    Pattern(Box<'a, Pattern<'a>>),
+    Quantifier(Box<'a, Quantifier<'a>>),
+    RegExpLiteral(Box<'a, RegExpLiteral<'a>>),
+    StringAlternative(Box<'a, StringAlternative<'a>>),
+}
+
+/// The type which includes all leaf nodes.
+#[derive(Debug)]
+pub enum Leaf<'a> {
+    Backreference(Box<'a, Backreference<'a>>),
+    BoundaryAssertion(Box<'a, BoundaryAssertion<'a>>),
+    Character(Box<'a, Character>),
+    CharacterSet(Box<'a, CharacterSet<'a>>),
+    Flags(Box<'a, Flags>),
+}
+
+/// The type which includes all atom nodes.
+#[derive(Debug)]
+pub enum Element<'a> {
+    Assertion(Box<'a, Assertion<'a>>),
+    QuantifiableElement(Box<'a, QuantifiableElement<'a>>),
+    Quantifier(Box<'a, Quantifier<'a>>),
+}
+
+/// The type which includes all atom nodes that Quantifier node can have as children.
+#[derive(Debug)]
+pub enum QuantifiableElement<'a> {
+    Backreference(Box<'a, Backreference<'a>>),
+    CapturingGroup(Box<'a, CapturingGroup<'a>>),
+    Character(Box<'a, Character>),
+    CharacterClass(Box<'a, CharacterClass<'a>>),
+    CharacterSet(Box<'a, CharacterSet<'a>>),
+    ExpressionCharacterClass(Box<'a, ExpressionCharacterClass<'a>>),
+    Group(Box<'a, Group<'a>>),
+    LookaheadAssertion(Box<'a, LookaheadAssertion<'a>>),
+}
+
+/// The type which includes all character class atom nodes.
+#[derive(Debug)]
+pub enum CharacterClassElement<'a> {
+    ClassRangesCharacterClassElement(Box<'a, ClassRangesCharacterClassElement<'a>>),
+    UnicodeSetsCharacterClassElement(Box<'a, UnicodeSetsCharacterClassElement<'a>>),
+}
+#[derive(Debug)]
+pub enum ClassRangesCharacterClassElement<'a> {
+    Character(Box<'a, Character>),
+    CharacterClassRange(Box<'a, CharacterClassRange>),
+    CharacterUnicodePropertyCharacterSet(Box<'a, CharacterUnicodePropertyCharacterSet>),
+    EscapeCharacterSet(Box<'a, EscapeCharacterSet>),
+}
+#[derive(Debug)]
+pub enum UnicodeSetsCharacterClassElement<'a> {
+    Character(Box<'a, Character>),
+    CharacterClassRange(Box<'a, CharacterClassRange>),
+    ClassStringDisjunction(Box<'a, ClassStringDisjunction<'a>>),
+    EscapeCharacterSet(Box<'a, EscapeCharacterSet>),
+    ExpressionCharacterClass(Box<'a, ExpressionCharacterClass<'a>>),
+    UnicodePropertyCharacterSet(Box<'a, UnicodePropertyCharacterSet<'a>>),
+    UnicodeSetsCharacterClass(Box<'a, UnicodeSetsCharacterClass<'a>>),
+}
+
+/// The root node.
+#[derive(Debug)]
+pub struct RegExpLiteral<'a> {
+    pub span: Span,
+    pub pattern: Pattern<'a>,
+    pub flags: Flags,
+}
+
+/// The pattern.
+#[derive(Debug)]
+pub struct Pattern<'a> {
+    pub span: Span,
+    pub alternatives: Vec<'a, Alternative<'a>>,
+}
+
+/// The alternative.
+/// E.g. `a|b`
+#[derive(Debug)]
+pub struct Alternative<'a> {
+    pub span: Span,
+    pub elements: Vec<'a, Element<'a>>,
+}
+
+/// The uncapturing group.
+/// E.g. `(?:ab)`
+#[derive(Debug)]
+pub struct Group<'a> {
+    pub span: Span,
+    pub alternatives: Vec<'a, Alternative<'a>>,
+}
+
+/// The capturing group.
+/// E.g. `(ab)`, `(?<name>ab)`
+#[derive(Debug)]
+pub struct CapturingGroup<'a> {
+    pub span: Span,
+    pub name: Option<Atom>,
+    pub alternatives: Vec<'a, Alternative<'a>>,
+    pub references: Vec<'a, Backreference<'a>>,
+}
+
+/// The lookaround assertion.
+#[derive(Debug)]
+pub enum LookaroundAssertion<'a> {
+    LookaheadAssertion(Box<'a, LookaheadAssertion<'a>>),
+    LookbehindAssertion(Box<'a, LookbehindAssertion<'a>>),
+}
+
+/// The lookahead assertion.
+/// E.g. `(?=ab)`, `(?!ab)`
+#[derive(Debug)]
+pub struct LookaheadAssertion<'a> {
+    pub span: Span,
+    pub negate: bool,
+    pub alternatives: Vec<'a, Alternative<'a>>,
+}
+
+/// The lookbehind assertion.
+/// E.g. `(?<=ab)`, `(?<!ab)`
+#[derive(Debug)]
+pub struct LookbehindAssertion<'a> {
+    pub span: Span,
+    pub negate: bool,
+    pub alternatives: Vec<'a, Alternative<'a>>,
+}
+
+/// The quantifier.
+/// E.g. `a?`, `a*`, `a+`, `a{1,2}`, `a??`, `a*?`, `a+?`, `a{1,2}?`
+#[derive(Debug)]
+pub struct Quantifier<'a> {
+    pub span: Span,
+    pub min: f64,
+    pub max: f64, // can be f64::INFINITY
+    pub greedy: bool,
+    pub element: QuantifiableElement<'a>,
+}
+
+/// The character class.
+/// E.g. `[ab]`, `[^ab]`
+#[derive(Debug)]
+pub enum CharacterClass<'a> {
+    ClassRangesCharacterClass(Box<'a, ClassRangesCharacterClass<'a>>),
+    UnicodeSetsCharacterClass(Box<'a, UnicodeSetsCharacterClass<'a>>),
+}
+
+/// The character class used in legacy (neither `u` nor `v` flag) and Unicode mode (`u` flag).
+/// This character class is guaranteed to **not** contain strings.
+/// In Unicode sets mode (`v` flag), {@link UnicodeSetsCharacterClass} is used.
+#[derive(Debug)]
+pub struct ClassRangesCharacterClass<'a> {
+    pub span: Span,
+    pub unicode_sets: bool,
+    pub elements: Vec<'a, ClassRangesCharacterClassElement<'a>>,
+}
+
+/// The character class used in Unicode sets mode (`v` flag).
+/// This character class may contain strings.
+#[derive(Debug)]
+pub struct UnicodeSetsCharacterClass<'a> {
+    pub span: Span,
+    pub elements: Vec<'a, UnicodeSetsCharacterClassElement<'a>>,
+}
+
+/// The character class.
+/// E.g. `[a-b]`
+#[derive(Debug)]
+pub struct CharacterClassRange {
+    pub span: Span,
+    pub min: Character,
+    pub max: Character,
+}
+
+/// The assertion.
+#[derive(Debug)]
+pub enum Assertion<'a> {
+    BoundaryAssertion(Box<'a, BoundaryAssertion<'a>>),
+    LookaroundAssertion(Box<'a, LookaroundAssertion<'a>>),
+}
+
+/// The boundary assertion.
+#[derive(Debug)]
+pub enum BoundaryAssertion<'a> {
+    EdgeAssertion(Box<'a, EdgeAssertion>),
+    WordBoundaryAssertion(Box<'a, WordBoundaryAssertion>),
+}
+
+/// The edge boundary assertion.
+/// E.g. `^`, `$`
+#[derive(Debug)]
+pub struct EdgeAssertion {
+    pub span: Span,
+    pub kind: EdgeAssertionKind,
+}
+
+#[derive(Debug)]
+pub enum EdgeAssertionKind {
+    Start,
+    End,
+}
+
+/// The word bondary assertion.
+/// E.g. `\b`, `\B`
+#[derive(Debug)]
+pub struct WordBoundaryAssertion {
+    pub span: Span,
+    pub negate: bool,
+}
+
+/// The character set.
+#[derive(Debug)]
+pub enum CharacterSet<'a> {
+    AnyCharacterSet,
+    EscapeCharacterSet(Box<'a, EscapeCharacterSet>),
+    UnicodePropertyCharacterSet(Box<'a, UnicodePropertyCharacterSet<'a>>),
+}
+
+/// The character class escape.
+/// E.g. `\d`, `\s`, `\w`, `\D`, `\S`, `\W`
+#[derive(Debug)]
+pub struct EscapeCharacterSet {
+    pub span: Span,
+    pub kind: EscapeCharacterSetKind,
+    pub negate: bool,
+}
+
+#[derive(Debug)]
+pub enum EscapeCharacterSetKind {
+    Digit,
+    Space,
+    Word,
+}
+
+/// The unicode property escape.
+/// E.g. `\p{ASCII}`, `\P{ASCII}`, `\p{Script=Hiragana}`
+#[derive(Debug)]
+pub enum UnicodePropertyCharacterSet<'a> {
+    CharacterUnicodePropertyCharacterSet(Box<'a, CharacterUnicodePropertyCharacterSet>),
+    StringsUnicodePropertyCharacterSet(Box<'a, StringsUnicodePropertyCharacterSet>),
+}
+
+#[derive(Debug)]
+pub struct CharacterUnicodePropertyCharacterSet {
+    pub span: Span,
+    pub key: Atom,
+    pub value: Option<Atom>,
+    pub negate: bool,
+}
+
+/// StringsUnicodePropertyCharacterSet is Unicode property escape with property of strings.
+#[derive(Debug)]
+pub struct StringsUnicodePropertyCharacterSet {
+    pub span: Span,
+    pub key: Atom,
+}
+
+/// The expression character class.
+/// E.g. `[a--b]`, `[a&&b]`,`[^a--b]`, `[^a&&b]`
+#[derive(Debug)]
+pub struct ExpressionCharacterClass<'a> {
+    pub span: Span,
+    pub negate: bool,
+    pub expression: ExpressionCharacterClassExpr<'a>,
+}
+
+#[derive(Debug)]
+pub enum ExpressionCharacterClassExpr<'a> {
+    ClassIntersection(Box<'a, ClassIntersection<'a>>),
+    ClassSubtraction(Box<'a, ClassSubtraction<'a>>),
+}
+
+#[derive(Debug)]
+pub enum ClassSetOperand<'a> {
+    Character(Box<'a, Character>),
+    ClassStringDisjunction(Box<'a, ClassStringDisjunction<'a>>),
+    EscapeCharacterSet(Box<'a, EscapeCharacterSet>),
+    ExpressionCharacterClass(Box<'a, ExpressionCharacterClass<'a>>),
+    UnicodePropertyCharacterSet(Box<'a, UnicodePropertyCharacterSet<'a>>),
+    UnicodeSetsCharacterClass(Box<'a, UnicodeSetsCharacterClass<'a>>),
+}
+
+/// The character class intersection.
+/// E.g. `a&&b`
+#[derive(Debug)]
+pub struct ClassIntersection<'a> {
+    pub span: Span,
+    pub left: ClassIntersectionLeft<'a>,
+    pub right: ClassSetOperand<'a>,
+}
+
+#[derive(Debug)]
+pub enum ClassIntersectionLeft<'a> {
+    ClassIntersection(Box<'a, ClassIntersection<'a>>),
+    ClassSetOperand(Box<'a, ClassSetOperand<'a>>),
+}
+
+/// The character class subtraction.
+/// E.g. `a--b`
+#[derive(Debug)]
+pub struct ClassSubtraction<'a> {
+    pub span: Span,
+    pub left: ClassSubtractionLeft<'a>,
+    pub right: ClassSetOperand<'a>,
+}
+
+#[derive(Debug)]
+pub enum ClassSubtractionLeft<'a> {
+    ClassSetOperand(Box<'a, ClassSetOperand<'a>>),
+    ClassSubtraction(Box<'a, ClassSubtraction<'a>>),
+}
+
+/// The character class string disjunction.
+/// E.g. `\q{a|b}`
+#[derive(Debug)]
+pub struct ClassStringDisjunction<'a> {
+    pub span: Span,
+    pub alternatives: Vec<'a, StringAlternative<'a>>,
+}
+
+/// StringAlternative is only used for `\q{alt}`({@link ClassStringDisjunction}).
+#[derive(Debug)]
+pub struct StringAlternative<'a> {
+    pub span: Span,
+    pub elements: Vec<'a, Character>,
+}
+
+/// This includes escape sequences which mean a character.
+/// E.g. `a`, `あ`, `✿`, `\x65`, `\u0065`, `\u{65}`, `\/`
+#[derive(Debug)]
+pub struct Character {
+    pub span: Span,
+    pub value: u16, // UTF-16 code point
+}
+
+#[derive(Debug)]
+pub enum BackreferenceRef {
+    Number(i32), // FIXME:
+    Atom(Atom),
+}
+
+/// The backreference.
+/// E.g. `\1`, `\k<name>`
+#[derive(Debug)]
+pub struct Backreference<'a> {
+    pub span: Span,
+    pub reference: BackreferenceRef,
+    pub resolved: CapturingGroup<'a>,
+}
+
+/// The flags.
+#[derive(Debug)]
+pub struct Flags {
+    pub span: Span,
+    pub dot_all: bool,
+    pub global: bool,
+    pub has_indices: bool,
+    pub ignore_case: bool,
+    pub multiline: bool,
+    pub sticky: bool,
+    pub unicode: bool,
+    pub unicode_sets: bool,
+}
diff --git a/crates/oxc_js_regex/src/lexer/mod.rs b/crates/oxc_js_regex/src/lexer/mod.rs
new file mode 100644
index 0000000000000..40d3ff585686a
--- /dev/null
+++ b/crates/oxc_js_regex/src/lexer/mod.rs
@@ -0,0 +1 @@
+mod token;
diff --git a/crates/oxc_js_regex/src/lexer/token.rs b/crates/oxc_js_regex/src/lexer/token.rs
new file mode 100644
index 0000000000000..8b137891791fe
--- /dev/null
+++ b/crates/oxc_js_regex/src/lexer/token.rs
@@ -0,0 +1 @@
+
diff --git a/crates/oxc_js_regex/src/lib.rs b/crates/oxc_js_regex/src/lib.rs
new file mode 100644
index 0000000000000..6647fb03be8f5
--- /dev/null
+++ b/crates/oxc_js_regex/src/lib.rs
@@ -0,0 +1,5 @@
+pub mod ast;
+mod lexer;
+pub mod parser;
+pub mod validator;
+pub mod visitor;
diff --git a/crates/oxc_js_regex/src/parser.rs b/crates/oxc_js_regex/src/parser.rs
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/crates/oxc_js_regex/src/validator.rs b/crates/oxc_js_regex/src/validator.rs
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/crates/oxc_js_regex/src/visitor.rs b/crates/oxc_js_regex/src/visitor.rs
new file mode 100644
index 0000000000000..e69de29bb2d1d

From 664e32b4628f5b9ae62f4cb5fe76e6397032fb7e Mon Sep 17 00:00:00 2001
From: Ubugeeei <ubuge1122@gmail.com>
Date: Wed, 22 Nov 2023 22:55:05 +0900
Subject: [PATCH 2/5] remove: dead comment

---
 crates/oxc_js_regex/src/ast.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/oxc_js_regex/src/ast.rs b/crates/oxc_js_regex/src/ast.rs
index a32b08caa3f8e..6690c5f7386fc 100644
--- a/crates/oxc_js_regex/src/ast.rs
+++ b/crates/oxc_js_regex/src/ast.rs
@@ -359,7 +359,7 @@ pub struct Character {
 
 #[derive(Debug)]
 pub enum BackreferenceRef {
-    Number(i32), // FIXME:
+    Number(i32),
     Atom(Atom),
 }
 

From f3981e627cc952e018b9120f0c541ed4359771ad Mon Sep 17 00:00:00 2001
From: Ubugeeei <ubuge1122@gmail.com>
Date: Wed, 22 Nov 2023 23:14:43 +0900
Subject: [PATCH 3/5] remove: unused dependencies

---
 Cargo.lock                     | 2 --
 crates/oxc_js_regex/Cargo.toml | 2 --
 2 files changed, 4 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 3fd4bec9ccb4b..726086bb882df 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1631,9 +1631,7 @@ name = "oxc_js_regex"
 version = "0.0.0"
 dependencies = [
  "oxc_allocator",
- "oxc_index",
  "oxc_span",
- "oxc_syntax",
 ]
 
 [[package]]
diff --git a/crates/oxc_js_regex/Cargo.toml b/crates/oxc_js_regex/Cargo.toml
index 895809c4f7b46..ab3df9dde38be 100644
--- a/crates/oxc_js_regex/Cargo.toml
+++ b/crates/oxc_js_regex/Cargo.toml
@@ -17,5 +17,3 @@ rust-version.workspace = true
 [dependencies]
 oxc_allocator = { workspace = true }
 oxc_span      = { workspace = true }
-oxc_syntax    = { workspace = true }
-oxc_index     = { workspace = true }

From b72b5be477393f7995c036d1ace2b397ef8b67fb Mon Sep 17 00:00:00 2001
From: Ubugeeei <ubuge1122@gmail.com>
Date: Thu, 23 Nov 2023 00:27:02 +0900
Subject: [PATCH 4/5] chore: configure lints and doctest

---
 crates/oxc_js_regex/Cargo.toml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/crates/oxc_js_regex/Cargo.toml b/crates/oxc_js_regex/Cargo.toml
index ab3df9dde38be..ff757bddfb9d2 100644
--- a/crates/oxc_js_regex/Cargo.toml
+++ b/crates/oxc_js_regex/Cargo.toml
@@ -12,7 +12,11 @@ license.workspace      = true
 repository.workspace   = true
 rust-version.workspace = true
 
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+[lints]
+workspace = true
+
+[lib]
+doctest = false
 
 [dependencies]
 oxc_allocator = { workspace = true }

From 35edcb7182438483fbcf3496358a2e2ace737fbc Mon Sep 17 00:00:00 2001
From: Ubugeeei <ubuge1122@gmail.com>
Date: Thu, 23 Nov 2023 00:29:27 +0900
Subject: [PATCH 5/5] chore: oxc_js_regex author

---
 crates/oxc_js_regex/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/oxc_js_regex/Cargo.toml b/crates/oxc_js_regex/Cargo.toml
index ff757bddfb9d2..38dcac20c5f06 100644
--- a/crates/oxc_js_regex/Cargo.toml
+++ b/crates/oxc_js_regex/Cargo.toml
@@ -2,7 +2,7 @@
 name                   = "oxc_js_regex"
 version                = "0.0.0"
 publish                = false
-authors.workspace      = true
+authors                = ["Ubugeeei <ubuge1122@gmail.com>"]
 categories.workspace   = true
 description.workspace  = true
 edition.workspace      = true