Skip to content

Commit

Permalink
Fix intersection branching (#50)
Browse files Browse the repository at this point in the history
  • Loading branch information
maciejhirsz authored Dec 10, 2018
1 parent 2eaf761 commit 06f58a0
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 43 deletions.
2 changes: 1 addition & 1 deletion logos-derive/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "logos-derive"
version = "0.9.2"
version = "0.9.3"
authors = ["maciejhirsz <[email protected]>"]
license = "MIT/Apache-2.0"
description = "Create ridiculously fast Lexers"
Expand Down
68 changes: 28 additions & 40 deletions logos-derive/src/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,16 @@ impl<'a> Branch<'a> {
Some(Node::Branch(self))
}
}

fn is_finite(&self) -> bool {
match self.then {
Some(ref node) => match **node {
Node::Fork(ref fork) => fork.kind == ForkKind::Plain,
_ => true,
},
None => false,
}
}
}

impl<'a> Fork<'a> {
Expand Down Expand Up @@ -277,30 +287,24 @@ impl<'a> Fork<'a> {
return self.insert_then(branch.then);
}

// FIXME!
//
// This is kind of a hack that prevents us from creating intersections for
// identifiers all the way down, blowing up the stack!
if self.is_finite() {
// Looking for intersection prefixes, that is: A ≠ B & (A ⊂ B | B ⊂ A)
for other in self.arms.iter_mut() {
if let Some(prefix) = branch.regex.common_prefix(&other.regex) {
let mut intersection = Branch::new(Regex::from(prefix));

let mut a = branch.clone();
let mut b = other.clone();

a.regex.unshift();
b.regex.unshift();

intersection.insert_then(a.to_node().map(Box::new));
intersection.insert_then(b.to_node().map(Box::new));

if intersection.regex.first() == branch.regex.first() {
branch = intersection;
} else {
mem::swap(other, &mut intersection);
}
// Looking for intersection prefixes, that is: A ≠ B & (A ⊂ B | B ⊂ A)
for other in self.arms.iter_mut().filter(|arm| arm.is_finite()) {
if let Some(prefix) = branch.regex.common_prefix(&other.regex) {
let mut intersection = Branch::new(Regex::from(prefix));

let mut a = branch.clone();
let mut b = other.clone();

a.regex.unshift();
b.regex.unshift();

intersection.insert_then(a.to_node().map(Box::new));
intersection.insert_then(b.to_node().map(Box::new));

if intersection.regex.first() == branch.regex.first() {
branch = intersection;
} else {
mem::swap(other, &mut intersection);
}
}
}
Expand Down Expand Up @@ -451,22 +455,6 @@ impl<'a> Fork<'a> {
}
}
}

fn is_finite(&self) -> bool {
match self.kind {
ForkKind::Plain => true,
ForkKind::Repeat => false,
ForkKind::Maybe => self.arms.iter().all(|arm| {
match arm.then {
Some(ref node) => match **node {
Node::Fork(ref fork) => fork.kind == ForkKind::Plain,
_ => true,
},
None => false,
}
}),
}
}
}

impl<'a> Node<'a> {
Expand Down
22 changes: 21 additions & 1 deletion tests/benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ static IDENTIFIERS: &str = "It was the year when they finally immanentized the E
fn identifiers(b: &mut Bencher) {
use logos::Logos;


b.bytes = IDENTIFIERS.len() as u64;

b.iter(|| {
Expand All @@ -132,6 +131,27 @@ fn identifiers(b: &mut Bencher) {
});
}

#[bench]
fn identifiers_nul_terminated(b: &mut Bencher) {
use logos::Logos;
use toolshed::Arena;

let arena = Arena::new();
let nts = arena.alloc_nul_term_str(IDENTIFIERS);

b.bytes = IDENTIFIERS.len() as u64;

b.iter(|| {
let mut lex = Token::lexer(nts);

while lex.token != Token::EndOfProgram {
lex.advance();
}

lex.token
});
}

#[bench]
fn logos(b: &mut Bencher) {
use logos::Logos;
Expand Down
2 changes: 1 addition & 1 deletion tests/tests/advanced.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ enum Token {
LiteralUrbitAddress,

#[regex="~[0-9]+-?[\\.0-9a-f]+"]
LiteralAbsDate,
LiteralAbsDate,

#[regex="~[mhs][0-9]+"]
LiteralRelDate,
Expand Down

0 comments on commit 06f58a0

Please sign in to comment.