From 8e092ad24c374963678e5d98f18edc5d82ab141a Mon Sep 17 00:00:00 2001 From: y21 <30553356+y21@users.noreply.github.com> Date: Mon, 25 Dec 2023 20:56:38 +0100 Subject: [PATCH] delay regex flag parsing --- Cargo.lock | 10 +++++++--- crates/dash_lexer/Cargo.toml | 1 - crates/dash_lexer/src/lib.rs | 15 +++++++-------- crates/dash_middle/src/lexer/token.rs | 2 +- crates/dash_parser/src/expr.rs | 11 +++++++++-- crates/dash_regex/src/error.rs | 5 +++++ crates/dash_regex/src/flags.rs | 4 +++- 7 files changed, 32 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 78144f5d..e3d11c50 100755 --- a/Cargo.lock +++ b/Cargo.lock @@ -111,9 +111,12 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.3.3" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" +dependencies = [ + "serde", +] [[package]] name = "bitvec" @@ -497,6 +500,7 @@ dependencies = [ name = "dash_regex" version = "0.1.0" dependencies = [ + "bitflags 2.4.1", "serde", "smallvec", "thiserror", @@ -1626,7 +1630,7 @@ version = "0.38.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a962918ea88d644592894bc6dc55acc6c0956488adcebbfb6e273506b7fd6e5" dependencies = [ - "bitflags 2.3.3", + "bitflags 2.4.1", "errno", "libc", "linux-raw-sys", diff --git a/crates/dash_lexer/Cargo.toml b/crates/dash_lexer/Cargo.toml index f011c10e..44fea8e1 100644 --- a/crates/dash_lexer/Cargo.toml +++ b/crates/dash_lexer/Cargo.toml @@ -8,4 +8,3 @@ edition = "2021" [dependencies] either = "1.6.1" dash_middle = { path = "../dash_middle" } -dash_regex = { path = "../dash_regex" } diff --git a/crates/dash_lexer/src/lib.rs b/crates/dash_lexer/src/lib.rs index 84fe7e3a..8fb20f6c 100644 --- a/crates/dash_lexer/src/lib.rs +++ b/crates/dash_lexer/src/lib.rs @@ -1,12 +1,11 @@ use std::borrow::Cow; use std::ops::Range; -use dash_middle::interner::{StringInterner, Symbol}; +use dash_middle::interner::{sym, StringInterner, Symbol}; use dash_middle::lexer::token::{as_token, Token, TokenType}; use dash_middle::parser::error::Error; use dash_middle::sourcemap::Span; use dash_middle::util; -use dash_regex::flags::Flags; /// A JavaScript source code lexer #[derive(Debug)] @@ -400,7 +399,7 @@ impl<'a, 'interner> Lexer<'a, 'interner> { } /// Assumes one character has already been read. - fn read_identifier_raw(&mut self) -> &'a str { + fn read_identifier_raw(&mut self) -> Symbol { let start = self.idx - 1; while !self.is_eof() { let cur = self.current_real(); @@ -412,13 +411,13 @@ impl<'a, 'interner> Lexer<'a, 'interner> { self.advance(); } - self.subslice(start..self.idx) + let slice = self.subslice(start..self.idx); + self.interner.intern(slice) } /// Reads an identifier and returns it as a node fn read_identifier(&mut self) { - let ident = self.read_identifier_raw(); - let sym = self.interner.intern(ident); + let sym = self.read_identifier_raw(); self.create_contextified_token(as_token(sym)); } @@ -440,9 +439,9 @@ impl<'a, 'interner> Lexer<'a, 'interner> { let flags = if self.current().is_some_and(util::is_alpha) { self.advance(); // identifier reading requires one character to be read - self.read_identifier_raw().parse::().unwrap() // TODO: handle error + self.read_identifier_raw() } else { - Flags::empty() + sym::EMPTY }; self.create_contextified_token(TokenType::RegexLiteral { diff --git a/crates/dash_middle/src/lexer/token.rs b/crates/dash_middle/src/lexer/token.rs index cffc2997..abc8f4b5 100644 --- a/crates/dash_middle/src/lexer/token.rs +++ b/crates/dash_middle/src/lexer/token.rs @@ -204,7 +204,7 @@ pub enum TokenType { /// Regex literal: /a+b/g #[display(fmt = "")] - RegexLiteral { literal: Symbol, flags: Flags }, + RegexLiteral { literal: Symbol, flags: Symbol }, #[display(fmt = "0x")] NumberHex(Symbol), diff --git a/crates/dash_parser/src/expr.rs b/crates/dash_parser/src/expr.rs index db7abccc..c3c4b29d 100644 --- a/crates/dash_parser/src/expr.rs +++ b/crates/dash_parser/src/expr.rs @@ -6,6 +6,7 @@ use dash_middle::parser::statement::{ BlockStatement, FunctionDeclaration, FunctionKind, Parameter, ReturnStatement, Statement, StatementKind, }; use dash_middle::sourcemap::Span; +use dash_regex::Flags; use crate::Parser; @@ -692,8 +693,14 @@ impl<'a, 'interner> Parser<'a, 'interner> { // Trim / prefix and suffix let full = self.interner.resolve(literal); let full = &full[1..full.len() - 1]; - let nodes = match dash_regex::Parser::new(full.as_bytes()).parse_all() { - Ok(nodes) => nodes, + let (nodes, flags) = match dash_regex::Parser::new(full.as_bytes()).parse_all().and_then(|node| { + self.interner + .resolve(flags) + .parse::() + .map_err(Into::into) + .map(|flags| (node, flags)) + }) { + Ok((nodes, flags)) => (nodes, flags), Err(err) => { let tok = self.previous().unwrap().clone(); self.create_error(Error::RegexSyntaxError(tok, err)); diff --git a/crates/dash_regex/src/error.rs b/crates/dash_regex/src/error.rs index 2475d873..1a3d127b 100644 --- a/crates/dash_regex/src/error.rs +++ b/crates/dash_regex/src/error.rs @@ -1,5 +1,7 @@ use thiserror::Error; +use crate::flags; + #[derive(Error, Debug)] pub enum Error { #[error("unexpected end of file")] @@ -7,4 +9,7 @@ pub enum Error { #[error("unexpected character: {}", *.0 as char)] UnexpectedChar(u8), + + #[error("{0}")] + Flags(#[from] flags::Error), } diff --git a/crates/dash_regex/src/flags.rs b/crates/dash_regex/src/flags.rs index 568317a6..6b17cad2 100644 --- a/crates/dash_regex/src/flags.rs +++ b/crates/dash_regex/src/flags.rs @@ -2,6 +2,7 @@ use std::str::FromStr; use bitflags::bitflags; use serde::{Deserialize, Serialize}; +use thiserror::Error; bitflags! { #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] @@ -12,8 +13,9 @@ bitflags! { } } -#[derive(Debug)] +#[derive(Debug, Error)] pub enum Error { + #[error("unknown flag: {0}")] UnknownFlag(char), }