diff --git a/book/src/attributes/logos.md b/book/src/attributes/logos.md index 38475df9..fc2f828b 100644 --- a/book/src/attributes/logos.md +++ b/book/src/attributes/logos.md @@ -9,6 +9,7 @@ The syntax is as follows: ```rust,no_run,no_playground #[derive(Logos)] #[logos(skip "regex literal")] +#[logos(skip("regex literal"[, callback, priority = ]))] #[logos(extras = ExtrasType)] #[logos(error = ErrorType)] #[logos(crate = path::to::logos)] diff --git a/book/src/callbacks.md b/book/src/callbacks.md index 01a3ffab..469cfde5 100644 --- a/book/src/callbacks.md +++ b/book/src/callbacks.md @@ -67,3 +67,5 @@ Callbacks can be also used to do perform more specialized lexing in place where regular expressions are too limiting. For specifics look at [`Lexer::remainder`](https://docs.rs/logos/latest/logos/struct.Lexer.html#method.remainder) and [`Lexer::bump`](https://docs.rs/logos/latest/logos/struct.Lexer.html#method.bump). + +Callbacks can also be used with #[logos(skip)], in which case the callback should return `Skip` or `()`. \ No newline at end of file diff --git a/examples/extras.rs b/examples/extras.rs index eb1bdb9a..e39613b8 100644 --- a/examples/extras.rs +++ b/examples/extras.rs @@ -49,10 +49,8 @@ fn word_callback(lex: &mut Lexer) -> (usize, usize) { /// Simple tokens to retrieve words and their location. #[derive(Debug, Logos)] #[logos(extras = (usize, usize))] +#[logos(skip(r"\n", newline_callback))] enum Token { - #[regex(r"\n", newline_callback)] - Newline, - #[regex(r"\w+", word_callback)] Word((usize, usize)), } diff --git a/logos-codegen/src/generator/leaf.rs b/logos-codegen/src/generator/leaf.rs index 04a6e983..9ee7fb6e 100644 --- a/logos-codegen/src/generator/leaf.rs +++ b/logos-codegen/src/generator/leaf.rs @@ -3,6 +3,7 @@ use quote::quote; use crate::generator::{Context, Generator}; use crate::leaf::{Callback, Leaf}; +use crate::parser::SkipCallback; use crate::util::MaybeVoid; impl<'a> Generator<'a> { @@ -45,6 +46,45 @@ impl<'a> Generator<'a> { callback(lex).construct(#constructor, lex); } } + Some(Callback::SkipCallback(SkipCallback::Label(label))) => { + quote! { + #bump + + trait SkipReturn {} + impl SkipReturn for () {} + impl SkipReturn for Skip {} + + fn callback(lex: &mut Lexer) -> impl SkipReturn { + #label(lex) + } + + callback(lex); + + lex.trivia(); + #name::lex(lex); + } + } + Some(Callback::SkipCallback(SkipCallback::Inline(inline))) => { + let arg = &inline.arg; + let body = &inline.body; + + quote! { + #bump + + trait SkipReturn {} + impl SkipReturn for () {} + impl SkipReturn for Skip {} + + fn callback(#arg: &mut Lexer) -> impl SkipReturn { + #body + } + + callback(lex); + + lex.trivia(); + #name::lex(lex); + } + } Some(Callback::Skip(_)) => { quote! { #bump diff --git a/logos-codegen/src/leaf.rs b/logos-codegen/src/leaf.rs index 5e0b810e..3428796d 100644 --- a/logos-codegen/src/leaf.rs +++ b/logos-codegen/src/leaf.rs @@ -5,6 +5,7 @@ use proc_macro2::{Span, TokenStream}; use syn::{spanned::Spanned, Ident}; use crate::graph::{Disambiguate, Node}; +use crate::parser::SkipCallback; use crate::util::MaybeVoid; #[derive(Clone)] @@ -20,6 +21,8 @@ pub struct Leaf<'t> { pub enum Callback { Label(TokenStream), Inline(Box), + #[allow(clippy::enum_variant_names)] + SkipCallback(SkipCallback), Skip(Span), } @@ -41,7 +44,8 @@ impl Callback { match self { Callback::Label(tokens) => tokens.span(), Callback::Inline(inline) => inline.span, - Callback::Skip(span) => *span, + Callback::SkipCallback(callback) => callback.span(), + Callback::Skip(skip) => *skip, } } } @@ -103,6 +107,7 @@ impl Debug for Leaf<'_> { Some(Callback::Label(ref label)) => write!(f, " ({})", label), Some(Callback::Inline(_)) => f.write_str(" ()"), Some(Callback::Skip(_)) => f.write_str(" ()"), + Some(Callback::SkipCallback(_)) => f.write_str("()"), None => Ok(()), } } diff --git a/logos-codegen/src/lib.rs b/logos-codegen/src/lib.rs index 2b2d3db2..6ad57274 100644 --- a/logos-codegen/src/lib.rs +++ b/logos-codegen/src/lib.rs @@ -69,16 +69,23 @@ pub fn generate(input: TokenStream) -> TokenStream { { let errors = &mut parser.errors; - for literal in &parser.skips { - match literal.to_mir(&parser.subpatterns, IgnoreFlags::Empty, errors) { + for mut skip in parser.skips.drain(..) { + match skip + .literal + .to_mir(&parser.subpatterns, IgnoreFlags::Empty, errors) + { Ok(mir) => { - let then = graph.push(Leaf::new_skip(literal.span()).priority(mir.priority())); + let then = graph.push( + Leaf::new_skip(skip.literal.span()) + .priority(skip.priority.take().unwrap_or_else(|| mir.priority())) + .callback(Some(skip.into_callback())), + ); let id = graph.regex(mir, then); regex_ids.push(id); } Err(err) => { - errors.err(err, literal.span()); + errors.err(err, skip.literal.span()); } } } diff --git a/logos-codegen/src/parser/mod.rs b/logos-codegen/src/parser/mod.rs index 3ad7202e..34e11a77 100644 --- a/logos-codegen/src/parser/mod.rs +++ b/logos-codegen/src/parser/mod.rs @@ -12,12 +12,14 @@ use crate::LOGOS_ATTR; mod definition; mod ignore_flags; mod nested; +mod skip; mod subpattern; mod type_params; pub use self::definition::{Definition, Literal}; pub use self::ignore_flags::IgnoreFlags; use self::nested::{AttributeParser, Nested, NestedValue}; +pub use self::skip::{Skip, SkipCallback}; pub use self::subpattern::Subpatterns; use self::type_params::{replace_lifetime, traverse_type, TypeParams}; @@ -26,7 +28,7 @@ pub struct Parser { pub errors: Errors, pub mode: Mode, pub source: Option, - pub skips: Vec, + pub skips: Vec, pub extras: MaybeVoid, pub error_type: MaybeVoid, pub subpatterns: Subpatterns, @@ -135,7 +137,18 @@ impl Parser { ("skip", |parser, span, value| match value { NestedValue::Literal(lit) => { if let Some(literal) = parser.parse_literal(Lit::new(lit)) { - parser.skips.push(literal); + parser.skips.push(Skip::new(literal)); + } + } + NestedValue::Group(tokens) => { + let token_span = tokens.span(); + if let Some(skip) = parser.parse_skip(tokens) { + parser.skips.push(skip); + } else { + parser.err( + "Expected #[logos(skip(...))] or #[logos(skip \"regex literal\")]", + token_span, + ); } } _ => { @@ -192,6 +205,48 @@ impl Parser { } } + pub fn parse_skip(&mut self, stream: TokenStream) -> Option { + // We don't call parse_attr here because we only want to parse what is inside the parentheses + let mut nested = AttributeParser::new(stream); + + let literal = match nested.parsed::()? { + Ok(lit) => self.parse_literal(lit)?, + Err(err) => { + self.err(err.to_string(), err.span()); + + return None; + } + }; + + let mut skip = Skip::new(literal); + + for (position, next) in nested.enumerate() { + match next { + Nested::Unexpected(tokens) => { + self.err("Unexpected token in attribute", tokens.span()); + } + Nested::Unnamed(tokens) => match position { + 0 => skip.callback = self.parse_skip_callback(tokens), + _ => { + self.err( + "\ + Expected a named argument at this position\n\ + \n\ + hint: If you are trying to define a callback here use: callback = ...\ + ", + tokens.span(), + ); + } + }, + Nested::Named(name, value) => { + skip.named_attr(name, value, self); + } + } + } + + Some(skip) + } + pub fn parse_literal(&mut self, lit: Lit) -> Option { match lit { Lit::Str(string) => Some(Literal::Utf8(string)), @@ -298,6 +353,23 @@ impl Parser { Some(inline.into()) } + fn parse_skip_callback(&mut self, tokens: TokenStream) -> Option { + let span = tokens.span(); + Some(match self.parse_callback(tokens) { + Some(Callback::Inline(inline)) => SkipCallback::Inline(inline), + Some(Callback::Label(label)) => SkipCallback::Label(label), + Some(Callback::Skip(_)) => { + // Probably not reachable + return None; + } + Some(Callback::SkipCallback(cb)) => cb, + None => { + self.err("Not a valid callback", span); + return None; + } + }) + } + /// Checks if `ty` is a declared generic param, if so replaces it /// with a concrete type defined using #[logos(type T = Type)] /// diff --git a/logos-codegen/src/parser/skip.rs b/logos-codegen/src/parser/skip.rs new file mode 100644 index 00000000..7ed7f687 --- /dev/null +++ b/logos-codegen/src/parser/skip.rs @@ -0,0 +1,100 @@ +use proc_macro2::{Ident, Span, TokenStream}; +use syn::spanned::Spanned; + +use crate::leaf::{Callback, InlineCallback}; +use crate::parser::nested::NestedValue; +use crate::parser::{Literal, Parser}; + +pub struct Skip { + pub literal: Literal, + pub callback: Option, + pub priority: Option, +} + +#[derive(Clone)] +pub enum SkipCallback { + Label(TokenStream), + Inline(Box), +} + +impl Skip { + pub fn new(literal: Literal) -> Self { + Self { + literal, + callback: None, + priority: None, + } + } + + pub fn named_attr(&mut self, name: Ident, value: NestedValue, parser: &mut Parser) { + match (name.to_string().as_str(), value) { + ("priority", NestedValue::Assign(tokens)) => { + let prio = match tokens.to_string().parse() { + Ok(prio) => prio, + Err(_) => { + parser.err("Expected an unsigned integer", tokens.span()); + return; + } + }; + + if self.priority.replace(prio).is_some() { + parser.err("Resetting previously set priority", tokens.span()); + } + } + ("priority", _) => { + parser.err("Expected: priority = ", name.span()); + } + ("callback", NestedValue::Assign(tokens)) => { + let span = tokens.span(); + let callback = match parser.parse_skip_callback(tokens) { + Some(callback) => callback, + None => { + parser.err("Not a valid callback", span); + return; + } + }; + + if let Some(previous) = self.callback.replace(callback) { + parser + .err( + "Callback has been already set", + span.join(name.span()).unwrap(), + ) + .err("Previous callback set here", previous.span()); + } + } + ("callback", _) => { + parser.err("Expected: callback = ...", name.span()); + } + (unknown, _) => { + parser.err( + format!( + "\ + Unknown nested attribute: {}\n\ + \n\ + Expected: callback\ + ", + unknown + ), + name.span(), + ); + } + } + } + + pub fn into_callback(self) -> Callback { + match self.callback { + Some(callback) => Callback::SkipCallback(callback), + None => Callback::Skip(self.literal.span()), + } + } +} + +impl SkipCallback { + pub fn span(&self) -> Span { + match self { + Self::Label(label) => label.span(), + Self::Inline(inline) => inline.span, + } + } +} diff --git a/tests/tests/callbacks.rs b/tests/tests/callbacks.rs index 2b143d7d..74deef5f 100644 --- a/tests/tests/callbacks.rs +++ b/tests/tests/callbacks.rs @@ -216,3 +216,85 @@ mod return_result_skip { assert_eq!(lexer.next(), Some(Err(LexerError::UnterminatedComment))); } } + +mod skip_callback_function { + use super::*; + + #[derive(Logos, Debug, PartialEq)] + #[logos(skip r"[ \t\n\f]+")] + #[logos(skip("") + .map(|id| id + 3) + .unwrap_or(lexer.remainder().len()); + lexer.bump(end); + } + + #[test] + fn skip_callback_function() { + let mut lexer = Token::lexer(" "); + assert_eq!(lexer.next(), Some(Ok(Token::Tag("foo")))); + assert_eq!(lexer.next(), Some(Ok(Token::Tag("bar")))); + assert_eq!(lexer.next(), None); + + let mut lexer = Token::lexer("