From 2a76eb91586e0261c1d7bfb20e1f905029243d6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9rome=20Eertmans?= Date: Mon, 19 Feb 2024 12:13:09 +0100 Subject: [PATCH 1/2] fix(lib): correctly match expected tokens --- tests/tests/issue_265.rs | 159 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 tests/tests/issue_265.rs diff --git a/tests/tests/issue_265.rs b/tests/tests/issue_265.rs new file mode 100644 index 00000000..ae6fec07 --- /dev/null +++ b/tests/tests/issue_265.rs @@ -0,0 +1,159 @@ +/// Test against issue #265 on GitHub and duplicates. +use logos_derive::Logos; +use tests::assert_lex; + +mod maltejanz { + /// From https://github.com/maciejhirsz/logos/issues/265 + use super::*; + + #[derive(Logos, Debug, Clone, Copy, PartialEq)] + #[allow(non_camel_case_types)] + pub enum Token { + #[regex(r"[ \t]+", priority = 1)] + TK_WHITESPACE = 0, + #[regex(r"[a-zA-Z][a-zA-Z0-9]*", priority = 1)] + TK_WORD, + #[token("not", priority = 50)] + TK_NOT, + #[token("not in", priority = 60)] + TK_NOT_IN, + } + + #[test] + fn single_not() { + assert_lex("not", &[(Ok(Token::TK_NOT), "not", 0..3)]); + } + + #[test] + fn word_then_not() { + assert_lex( + "word not", + &[ + (Ok(Token::TK_WORD), "word", 0..4), + (Ok(Token::TK_WHITESPACE), " ", 4..5), + (Ok(Token::TK_NOT), "not", 5..8), + ], + ); + } + + #[test] + fn not_then_word() { + assert_lex( + "not word", + &[ + (Ok(Token::TK_NOT), "word", 0..3), + (Ok(Token::TK_WHITESPACE), " ", 3..4), + (Ok(Token::TK_WORD), "not", 4..8), + ], + ); + } + + #[test] + fn not_in() { + assert_lex( + "not in ", + &[ + (Ok(Token::TK_NOT_IN), "not in", 0..6), + (Ok(Token::TK_WHITESPACE), " ", 6..7), + ], + ); + } +} + +mod jeertmans { + /// From https://github.com/maciejhirsz/logos/issues/279 + use super::*; + + #[derive(Logos, Debug, Clone, Copy, PartialEq)] + enum Token { + #[token(r"\")] + Backslash, + #[token(r"\\")] + DoubleBackslash, + #[token(r"\begin")] + EnvironmentBegin, + #[token(r"\end")] + EnvironmentEnd, + #[token(r"\begin{document}")] + DocumentBegin, + #[regex(r"\\[a-zA-Z]+")] + MacroName, + } + + #[test] + fn backslash() { + assert_lex( + r"\+\\+", + &[ + (Ok(Token::Backslash), r"\", 0..1), + (Err(()), "+", 1..2), + (Ok(Token::DoubleBackslash), r"\\", 2..4), + (Err(()), "+", 4..5), + ], + ); + } + + #[test] + fn double_backslash() { + assert_lex( + r"\\\", + &[ + (Ok(Token::DoubleBackslash), r"\\", 0..2), + (Ok(Token::Backslash), r"\", 2..3), + ], + ); + } + + #[test] + fn environment_begin() { + assert_lex( + r"\begin{equation}", + &[ + (Ok(Token::EnvironmentBegin), r"\begin", 0..6), + ], + ); + } + + #[test] + fn environment_end() { + assert_lex( + r"\end{equation}", + &[ + (Ok(Token::EnvironmentEnd), r"\end", 0..4), + ], + ); + } +} + +mod afreeland { + /// From https://github.com/maciejhirsz/logos/issues/377 + use super::*; + + #[derive(Logos, Debug, Clone, Copy, PartialEq)] + enum Token { + #[token("alert")] + Action, + #[token("tls")] + Protocol, + #[regex(r"([^\s]+) ([^\s]+) (->|<-) ([^\s]+) ([^\s]+)")] + NetworkInfo, + } + + #[test] + fn basic() { + assert_lex( + "alert tls $HOME_NET any -> $EXTERNAL_NET any (msg:\"some bs\")", + &[ + (Ok(Token::Action), "alert", 0..5), + (Err(()), " ", 5..6), + (Ok(Token::Action), "tsl", 6..9), + (Err(()), " ", 9..10), + ( + Ok(Token::NetworkInfo), + "$HOME_NET any -> $EXTERNAL_NET any (msg:\"some bs\")", + 10..60, + ), + ], + ); + } +} From 4a09c2143db54f9ebadeded4c063c7910b245fd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9rome=20Eertmans?= Date: Tue, 9 Apr 2024 15:42:39 +0200 Subject: [PATCH 2/2] try: patch from @elenakrittik Reference: https://github.com/elenakrittik/logos/commit/148b6a0144237cfe57886a12a8138ce8d38c8219 --- logos-codegen/src/generator/context.rs | 6 +----- logos-codegen/src/generator/mod.rs | 13 ++++--------- tests/tests/issue_265.rs | 8 ++------ 3 files changed, 7 insertions(+), 20 deletions(-) diff --git a/logos-codegen/src/generator/context.rs b/logos-codegen/src/generator/context.rs index 515785ee..c293f9e5 100644 --- a/logos-codegen/src/generator/context.rs +++ b/logos-codegen/src/generator/context.rs @@ -26,13 +26,9 @@ pub struct Context { } impl Context { - pub fn can_backtrack(&self) -> bool { - self.backtrack.is_some() - } - pub fn switch(&mut self, miss: Option) -> Option { self.backtrack = Some(miss?); - self.bump() + None } pub const fn advance(self, n: usize) -> Self { diff --git a/logos-codegen/src/generator/mod.rs b/logos-codegen/src/generator/mod.rs index 1b8bf8bf..d18638bc 100644 --- a/logos-codegen/src/generator/mod.rs +++ b/logos-codegen/src/generator/mod.rs @@ -112,19 +112,14 @@ impl<'a> Generator<'a> { let meta = &self.meta[id]; let enters_loop = !meta.loop_entry_from.is_empty(); - let bump = if enters_loop || !ctx.can_backtrack() { - ctx.switch(self.graph[id].miss()) + ctx.switch(self.graph[id].miss()); + + let bump = if enters_loop || meta.min_read == 0 { + ctx.bump() } else { None }; - let bump = match (bump, enters_loop, meta.min_read) { - (Some(t), _, _) => Some(t), - (None, true, _) => ctx.bump(), - (None, false, 0) => ctx.bump(), - (None, false, _) => None, - }; - if meta.min_read == 0 || ctx.remainder() < meta.min_read { ctx.wipe(); } diff --git a/tests/tests/issue_265.rs b/tests/tests/issue_265.rs index ae6fec07..684109be 100644 --- a/tests/tests/issue_265.rs +++ b/tests/tests/issue_265.rs @@ -108,9 +108,7 @@ mod jeertmans { fn environment_begin() { assert_lex( r"\begin{equation}", - &[ - (Ok(Token::EnvironmentBegin), r"\begin", 0..6), - ], + &[(Ok(Token::EnvironmentBegin), r"\begin", 0..6)], ); } @@ -118,9 +116,7 @@ mod jeertmans { fn environment_end() { assert_lex( r"\end{equation}", - &[ - (Ok(Token::EnvironmentEnd), r"\end", 0..4), - ], + &[(Ok(Token::EnvironmentEnd), r"\end", 0..4)], ); } }