Skip to content

Commit

Permalink
refactor(parser): extracted reused regexes into separate module with …
Browse files Browse the repository at this point in the history
…lazy_static! macro
  • Loading branch information
AurumTheEnd committed Mar 8, 2024
1 parent 326bb8b commit 27f7d09
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 13 deletions.
6 changes: 4 additions & 2 deletions src/parser/structs/intermediate_token.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use itertools::Itertools;
use regex::{Regex, RegexSet};
use regex::RegexSet;

use crate::parser::utils::LITERAL_IDENTIFIER;

#[derive(PartialEq, Debug)]
pub enum IntermediateToken<'a> {
Expand Down Expand Up @@ -126,7 +128,7 @@ impl<'a> IntermediateToken<'a> {
format!(
r"(?i)^{}{}",

Check warning on line 129 in src/parser/structs/intermediate_token.rs

View check run for this annotation

Codecov / codecov/patch

src/parser/structs/intermediate_token.rs#L129

Added line #L129 was not covered by tests
regex::escape(pattern),
if Regex::new(r"[-_a-zA-Z0-9]+").unwrap().is_match(pattern) {
if LITERAL_IDENTIFIER.is_match(pattern) {
"([^-_a-zA-Z0-9]|$)"
} else {
""
Expand Down
15 changes: 4 additions & 11 deletions src/parser/tokenize.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use std::str::Chars;

use itertools::{Itertools, MultiPeek};
use regex::Regex;

use crate::parser::error::TokenizeError;
use crate::parser::error::TokenizeError::MissingClosingParenthesis;
use crate::parser::structs::{FinalToken, IntermediateToken};
use crate::parser::utils::SHOULD_END_LITERAL;
use crate::parser::utils::{peek_until_n, pop_n_left, trim_whitespace_left};

pub fn tokenize(input: &str) -> Result<Vec<FinalToken>, TokenizeError> {
Expand All @@ -20,17 +20,14 @@ fn tokenize_level(
let mut buffer = String::new();
let take_size = IntermediateToken::longest_token_len() + 1;

// TODO make regex lazy-static
let should_end_literal = Regex::new(r"[^-_a-zA-Z0-9]").unwrap();

// trim whitespace in case of whitespace after opening parentesis
trim_whitespace_left(input);

while peek_until_n(take_size, input, &mut buffer) || !buffer.is_empty() {
let intermediate_token = IntermediateToken::try_from(buffer.as_str());

match intermediate_token {
None => consume_while_literal(input, &mut result, &should_end_literal),
None => consume_while_literal(input, &mut result),
Some(token) => {
let (final_token, pattern_length) = match token {
IntermediateToken::And { pattern } => {
Expand Down Expand Up @@ -130,16 +127,12 @@ fn consume_until_brace(
Ok((FinalToken::Literal(literal_buffer), 0))
}

fn consume_while_literal(
input: &mut MultiPeek<Chars>,
result: &mut Vec<FinalToken>,
should_end_literal: &Regex,
) {
fn consume_while_literal(input: &mut MultiPeek<Chars>, result: &mut Vec<FinalToken>) {
let mut literal_buffer: String = String::new();
input.reset_peek();

while let Some(c) = input.peek() {
if should_end_literal.is_match(&c.to_string()) {
if SHOULD_END_LITERAL.is_match(&c.to_string()) {
break;
}

Expand Down
2 changes: 2 additions & 0 deletions src/parser/utils/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
pub use peek_until::peek_until_n;
pub use pop::pop_n_left;
pub use regex::{LITERAL_IDENTIFIER, SHOULD_END_LITERAL};
pub use trim_whitespace::trim_whitespace_left;

mod peek_until;
mod pop;
mod regex;
mod trim_whitespace;
6 changes: 6 additions & 0 deletions src/parser/utils/regex.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
use regex::Regex;

lazy_static::lazy_static! {
pub static ref SHOULD_END_LITERAL: Regex = Regex::new(r"[^-_a-zA-Z0-9]").unwrap();
pub static ref LITERAL_IDENTIFIER: Regex = Regex::new(r"[-_a-zA-Z0-9]+").unwrap();
}

0 comments on commit 27f7d09

Please sign in to comment.