Skip to content

Commit

Permalink
Add support for case insensitivity (#198)
Browse files Browse the repository at this point in the history
* Create the `IgnoreFlags` structure

* Add the `IgnoreFlags` structure
* Parsing logic of `IgnoreFlags`

* Add `ApplieIgnoreFlags` trait

* Add `ApplieIgnoreFlags` trait
* Implementation of that trait for a whole lot of structures
* Updated `lib.rs` to take the ignore flags in account

* Add tests for `ignore(case/ascii_case)`

* Remove the `ApplieIgnoreFlags` trait

* Remove the `ApplieIgnoreFlags` trait
-> Replaced with with `MakeAsciiInsensitive` trait which serves the same purpose but only for the flag `IgnoreAsciiCase`.

* Add ignore flags support for `to_mir`

* `Literal::to_mir` now takes ignore flags and parses mir following those flags.
* Add `Mir::utf8_ignore_case` and `Mir::binary_ignore_case`
* Add `Literal::escape_regex`

* Add a simple test for escaped regex
  • Loading branch information
gymore-io authored Feb 1, 2021
1 parent aaa6572 commit 4181591
Show file tree
Hide file tree
Showing 6 changed files with 820 additions and 17 deletions.
47 changes: 35 additions & 12 deletions logos-derive/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,14 +132,35 @@ pub fn logos(input: TokenStream) -> TokenStream {
}
};

let bytes = definition.literal.to_bytes();
let then = graph.push(
leaf(definition.literal.span())
.priority(definition.priority.unwrap_or(bytes.len() * 2))
.callback(definition.callback),
);

ropes.push(Rope::new(bytes, then));
if definition.ignore_flags.is_empty() {
let bytes = definition.literal.to_bytes();
let then = graph.push(
leaf(definition.literal.span())
.priority(definition.priority.unwrap_or(bytes.len() * 2))
.callback(definition.callback),
);

ropes.push(Rope::new(bytes, then));
} else {
let mir = definition
.literal
.escape_regex()
.to_mir(
&Default::default(),
definition.ignore_flags,
&mut parser.errors,
)
.expect("The literal should be perfectly valid regex");

let then = graph.push(
leaf(definition.literal.span())
.priority(definition.priority.unwrap_or_else(|| mir.priority()))
.callback(definition.callback),
);
let id = graph.regex(mir, then);

regex_ids.push(id);
}
}
"regex" => {
let definition = match parser.parse_definition(attr) {
Expand All @@ -149,16 +170,18 @@ pub fn logos(input: TokenStream) -> TokenStream {
continue;
}
};
let mir = match definition
.literal
.to_mir(&parser.subpatterns, &mut parser.errors)
{
let mir = match definition.literal.to_mir(
&parser.subpatterns,
definition.ignore_flags,
&mut parser.errors,
) {
Ok(mir) => mir,
Err(err) => {
parser.err(err, definition.literal.span());
continue;
}
};

let then = graph.push(
leaf(definition.literal.span())
.priority(definition.priority.unwrap_or_else(|| mir.priority()))
Expand Down
20 changes: 20 additions & 0 deletions logos-derive/src/mir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,15 @@ impl Mir {
Mir::try_from(ParserBuilder::new().build().parse(source)?)
}

pub fn utf8_ignore_case(source: &str) -> Result<Mir> {
Mir::try_from(
ParserBuilder::new()
.case_insensitive(true)
.build()
.parse(source)?,
)
}

pub fn binary(source: &str) -> Result<Mir> {
Mir::try_from(
ParserBuilder::new()
Expand All @@ -37,6 +46,17 @@ impl Mir {
)
}

pub fn binary_ignore_case(source: &str) -> Result<Mir> {
Mir::try_from(
ParserBuilder::new()
.allow_invalid_utf8(true)
.unicode(false)
.case_insensitive(true)
.build()
.parse(source)?,
)
}

pub fn priority(&self) -> usize {
match self {
Mir::Empty | Mir::Loop(_) | Mir::Maybe(_) => 0,
Expand Down
53 changes: 48 additions & 5 deletions logos-derive/src/parser/definition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,15 @@ use crate::error::{Errors, Result};
use crate::leaf::Callback;
use crate::mir::Mir;
use crate::parser::nested::NestedValue;
use crate::parser::{Parser, Subpatterns};
use crate::parser::{IgnoreFlags, Parser, Subpatterns};

use super::ignore_flags::ascii_case::MakeAsciiCaseInsensitive;

pub struct Definition {
pub literal: Literal,
pub priority: Option<usize>,
pub callback: Option<Callback>,
pub ignore_flags: IgnoreFlags,
}

pub enum Literal {
Expand All @@ -24,6 +27,7 @@ impl Definition {
literal,
priority: None,
callback: None,
ignore_flags: IgnoreFlags::Empty,
}
}

Expand Down Expand Up @@ -67,6 +71,12 @@ impl Definition {
("callback", _) => {
parser.err("Expected: callback = ...", name.span());
}
("ignore", NestedValue::Group(tokens)) => {
self.ignore_flags.parse_group(name, tokens, parser);
}
("ignore", _) => {
parser.err("Expected: ignore(<flag>, ...)", name.span());
}
(unknown, _) => {
parser.err(
format!(
Expand All @@ -92,11 +102,44 @@ impl Literal {
}
}

pub fn to_mir(&self, subpatterns: &Subpatterns, errors: &mut Errors) -> Result<Mir> {
let value = subpatterns.fix(self, errors);
pub fn escape_regex(&self) -> Literal {
match self {
Literal::Utf8(_) => Mir::utf8(&value),
Literal::Bytes(_) => Mir::binary(&value),
Literal::Utf8(string) => Literal::Utf8(LitStr::new(
regex_syntax::escape(&string.value()).as_str(),
self.span(),
)),
Literal::Bytes(bytes) => Literal::Bytes(LitByteStr::new(
regex_syntax::escape(&bytes_to_regex_string(bytes.value())).as_bytes(),
self.span(),
)),
}
}

pub fn to_mir(
&self,
subpatterns: &Subpatterns,
ignore_flags: IgnoreFlags,
errors: &mut Errors,
) -> Result<Mir> {
let value = subpatterns.fix(self, errors);

if ignore_flags.contains(IgnoreFlags::IgnoreAsciiCase) {
match self {
Literal::Utf8(_) => {
Mir::utf8(&value).map(MakeAsciiCaseInsensitive::make_ascii_case_insensitive)
}
Literal::Bytes(_) => Mir::binary_ignore_case(&value),
}
} else if ignore_flags.contains(IgnoreFlags::IgnoreCase) {
match self {
Literal::Utf8(_) => Mir::utf8_ignore_case(&value),
Literal::Bytes(_) => Mir::binary_ignore_case(&value),
}
} else {
match self {
Literal::Utf8(_) => Mir::utf8(&value),
Literal::Bytes(_) => Mir::binary(&value),
}
}
}

Expand Down
Loading

0 comments on commit 4181591

Please sign in to comment.