From 221349e4dfb7dce2ba8fc465a034621b5cda67b5 Mon Sep 17 00:00:00 2001 From: Sebastian Kaupper Date: Mon, 20 Jan 2025 09:25:43 +0100 Subject: [PATCH 1/2] Implement greedy name parsing --- vhdl_syntax/src/parser/productions/design.rs | 107 +++- .../src/parser/productions/expression.rs | 20 +- .../src/parser/productions/interface.rs | 102 ++++ vhdl_syntax/src/parser/productions/mod.rs | 1 + vhdl_syntax/src/parser/productions/names.rs | 524 +++++++++++++++++- .../src/parser/productions/scalar_types.rs | 80 +++ .../src/parser/productions/signature.rs | 100 +++- vhdl_syntax/src/parser/util.rs | 93 ++++ vhdl_syntax/src/syntax/node_kind.rs | 28 +- 9 files changed, 1047 insertions(+), 8 deletions(-) create mode 100644 vhdl_syntax/src/parser/productions/scalar_types.rs diff --git a/vhdl_syntax/src/parser/productions/design.rs b/vhdl_syntax/src/parser/productions/design.rs index 23213d5d..cbf4575b 100644 --- a/vhdl_syntax/src/parser/productions/design.rs +++ b/vhdl_syntax/src/parser/productions/design.rs @@ -41,7 +41,39 @@ impl Parser { self.end_node(); } - pub fn context_clause(&mut self) {} + pub fn context_clause(&mut self) { + loop { + match self.tokenizer.peek_next() { + Some(tok) => match tok.kind() { + Keyword(Kw::Use) => self.use_clause(), + Keyword(Kw::Library) => self.library_clause(), + Keyword(Kw::Context) => self.context_reference(), + _ => break, + }, + _ => self.eof_err(), + } + } + } + + pub fn library_clause(&mut self) { + self.start_node(NodeKind::LibraryClause); + self.expect_token(Keyword(Kw::Library)); + self.identifier_list(); + self.expect_token(SemiColon); + self.end_node(); + } + + pub fn use_clause(&mut self) { + self.start_node(NodeKind::UseClause); + self.expect_token(Keyword(Kw::Use)); + self.name_list(); + self.expect_token(SemiColon); + self.end_node(); + } + + pub fn context_reference(&mut self) { + todo!(); + } } #[cfg(test)] @@ -88,6 +120,79 @@ DesignFile Keyword(End) Keyword(Entity) SemiColon +" + ); + } + + #[test] + fn parse_entity_with_context_clause() { + let (design, _) = "\ + library ieee; + use ieee.std_logic_1164.all; + + entity my_ent is + begin + end my_ent; + " + .parse_syntax(Parser::design_file); + assert_eq!( + design.test_text(), + "\ +DesignFile + DesignUnit + LibraryClause + Keyword(Library) + IdentifierList + Identifier 'ieee' + SemiColon + UseClause + Keyword(Use) + NameList + Name + Identifier 'ieee' + SelectedName + Dot + Identifier 'std_logic_1164' + SelectedName + Dot + Keyword(All) + SemiColon + EntityDeclaration + Keyword(Entity) + Identifier 'my_ent' + Keyword(Is) + EntityHeader + Keyword(Begin) + Keyword(End) + Identifier 'my_ent' + SemiColon +" + ); + } + + #[test] + fn parse_use_clause() { + let (node, diag) = "use lib1.lib2.lib3.all;".parse_syntax(Parser::use_clause); + assert_eq!(diag.len(), 0); + + assert_eq!( + node.test_text(), + "\ +UseClause + Keyword(Use) + NameList + Name + Identifier 'lib1' + SelectedName + Dot + Identifier 'lib2' + SelectedName + Dot + Identifier 'lib3' + SelectedName + Dot + Keyword(All) + SemiColon " ); } diff --git a/vhdl_syntax/src/parser/productions/expression.rs b/vhdl_syntax/src/parser/productions/expression.rs index a2a913bb..f1f303c5 100644 --- a/vhdl_syntax/src/parser/productions/expression.rs +++ b/vhdl_syntax/src/parser/productions/expression.rs @@ -5,11 +5,29 @@ // Copyright (c) 2025, Lukas Scheller lukasscheller@icloud.com use crate::parser::Parser; +use crate::syntax::node_kind::NodeKind::*; +use crate::tokens::TokenKind::*; use crate::tokens::TokenStream; impl Parser { pub fn expression(&mut self) { - // TODO + self.start_node(Expression); + // TODO: Expecting a simple expression is just a placeholder + self.simple_expression(); + self.end_node(); + } + + pub fn simple_expression(&mut self) { + self.start_node(SimpleExpression); + // TODO: Expecting these literals is just a placeholder + self.expect_one_of_tokens([CharacterLiteral, StringLiteral, Identifier, AbstractLiteral]); + self.end_node(); + } + + pub fn expression_list(&mut self) { + self.start_node(ExpressionList); + self.separated_list(Parser::expression, Comma); + self.end_node(); } pub fn condition(&mut self) { diff --git a/vhdl_syntax/src/parser/productions/interface.rs b/vhdl_syntax/src/parser/productions/interface.rs index 85d102e7..8a2f221e 100644 --- a/vhdl_syntax/src/parser/productions/interface.rs +++ b/vhdl_syntax/src/parser/productions/interface.rs @@ -5,6 +5,7 @@ // Copyright (c) 2025, Lukas Scheller lukasscheller@icloud.com use crate::parser::Parser; +use crate::syntax::node_kind::InternalNodeKind; use crate::syntax::node_kind::NodeKind::*; use crate::tokens::Keyword as Kw; use crate::tokens::TokenKind::*; @@ -87,6 +88,56 @@ impl Parser { Keyword(Kw::Linkage), ]); } + + pub fn association_list(&mut self) { + self.start_node(AssociationList); + self.separated_list(Parser::association_element, Comma); + self.end_node(); + } + + pub fn association_element(&mut self) { + self.start_node(AssociationElement); + + let right_arrow_idx = match self.distance_to_closing_paren_or_token(Comma) { + Some(length) => self.lookahead_max_distance(length, [RightArrow]), + None => { + self.eof_err(); + return; + } + }; + + if right_arrow_idx.is_some() { + self.formal_part(); + self.expect_token(RightArrow); + } + self.actual_part(); + + self.end_node(); + } + + pub fn formal_part(&mut self) { + self.start_node(FormalPart); + self.name(); + // Note: `self.name()` will already consume any trailing parenthesized names! + self.end_node(); + } + + pub fn actual_part(&mut self) { + self.start_node(ActualPart); + let length = match self.distance_to_closing_paren_or_token(Comma) { + Some(distance) => distance, + None => { + self.eof_err(); + return; + } + }; + + // TODO: Parsing of `actual_part` would boil down to `name | expression | subtype_indication` + self.start_node(Internal(InternalNodeKind::ActualPartTokens)); + self.skip_n(length); + self.end_node(); + self.end_node(); + } } #[cfg(test)] @@ -94,6 +145,57 @@ mod tests { use crate::parser::test_utils::check; use crate::parser::Parser; + #[test] + fn association_list() { + // Make sure the association list is followed by a closing parenthesis, otherwise parsing will fail + // In reality that shouldn't be a problem, since association lists are always to be enclosed in parenthesis! + check( + Parser::association_list, + "arg1, arg2)", + "\ +AssociationList + AssociationElement + ActualPart + Internal(ActualPartTokens) + Identifier 'arg1' + Comma + AssociationElement + ActualPart + Internal(ActualPartTokens) + Identifier 'arg2' +", + ); + + check( + Parser::association_list, + "p1 => 1, std_ulogic(p2)=> sl_sig)", + "\ +AssociationList + AssociationElement + FormalPart + Name + Identifier 'p1' + RightArrow + ActualPart + Internal(ActualPartTokens) + AbstractLiteral + Comma + AssociationElement + FormalPart + Name + Identifier 'std_ulogic' + Internal(SubtypeIndicationOrExpressionTokens) + LeftPar + Identifier 'p2' + RightPar + RightArrow + ActualPart + Internal(ActualPartTokens) + Identifier 'sl_sig' +", + ); + } + #[test] fn empty_generic_clause() { check( diff --git a/vhdl_syntax/src/parser/productions/mod.rs b/vhdl_syntax/src/parser/productions/mod.rs index e9fc2a74..1c64d637 100644 --- a/vhdl_syntax/src/parser/productions/mod.rs +++ b/vhdl_syntax/src/parser/productions/mod.rs @@ -16,3 +16,4 @@ mod names; mod signature; mod statements; mod subtype; +mod scalar_types; diff --git a/vhdl_syntax/src/parser/productions/names.rs b/vhdl_syntax/src/parser/productions/names.rs index 99f583c1..7d2a354e 100644 --- a/vhdl_syntax/src/parser/productions/names.rs +++ b/vhdl_syntax/src/parser/productions/names.rs @@ -5,19 +5,61 @@ // Copyright (c) 2025, Lukas Scheller lukasscheller@icloud.com use crate::parser::Parser; -use crate::syntax::node_kind::NodeKind::{Label, Name}; +use crate::syntax::node_kind::InternalNodeKind::*; +use crate::syntax::node_kind::NodeKind::*; +use crate::tokens::Keyword as Kw; use crate::tokens::TokenKind::*; use crate::tokens::TokenStream; +fn is_start_of_attribute_name(parser: &mut Parser) -> bool { + // Checking for `LeftSquare || Tick` will result in ambiguities with other grammar rules where a signature is possible right after a name. + // Those rules can be `alias_declaration` (LRM §6.6.1) and `subprogram_instantiation_declaration` (LRM §4.4). + // By checking whether the closing square bracket is followed by a `Tick` this ambiguity is resolved + match parser.peek_token() { + Some(Tick) => true, + Some(LeftSquare) => { + let mut idx = 1; + let mut bracket_count = 1; + + while bracket_count > 0 { + match parser.peek_nth_token(idx) { + Some(LeftSquare) => bracket_count += 1, + Some(RightSquare) => bracket_count -= 1, + Some(_) => {} + None => { + return false; + } + } + + idx += 1; + } + + parser.next_nth_is(Tick, idx) + } + Some(_) | None => false, + } +} + impl Parser { pub fn designator(&mut self) { self.expect_one_of_tokens([Identifier, StringLiteral, CharacterLiteral]); } pub fn name(&mut self) { + // (Based on) LRM §8.1 + // The LRM grammar rules for names were transformed to avoid left recursion. + + // In contrast to the LRM, this parsing routine is greedy. Meaning, it will consume trailing parenthesized + // expressions even if the belong to an outer grammar rule! self.start_node(Name); - // TODO - self.designator(); + + if self.next_is(LtLt) { + self.external_name(); + } else { + self.designator(); + } + + self.name_tail(); self.end_node(); } @@ -32,4 +74,480 @@ impl Parser { self.end_node(); } } + pub fn name_list(&mut self) { + self.start_node(NameList); + self.separated_list(Parser::name, Comma); + self.end_node(); + } + + fn suffix(&mut self) { + // LRM §8.3 + // suffix ::= identifier | string_literal | character_literal | `all` ; + self.expect_one_of_tokens([ + Identifier, + StringLiteral, + CharacterLiteral, + Keyword(Kw::All), + ]); + } + + fn name_tail(&mut self) { + // name ::= prefix [ name_tail ] ; + // name_tail ::= selected_name | attribute_name | indexed_name | slice_name | function_name ; + // selected_name ::= `.` suffix [ name_tail ] ; + // attribute_name ::= [ signature ] `'` identifier [ `(` expression `)` ] [ name_tail ] ; + // function_name ::= `(` association_list `)` [ name_tail ] ; + // indexed_name ::= `(` expression { `,` expression } `)` [ name_tail ] ; + // slice_name ::= `(` discrete_range `)` [ name_tail ] ; + + if self.next_is(Dot) { + self.start_node(SelectedName); + self.expect_token(Dot); + self.suffix(); + self.end_node(); + self.name_tail(); + } else if self.next_is(LeftPar) { + // Try to differentiate between function calls, indexed names and slices as good as possible: + // 1. An `association_list` can be uniquely identified by searching for a '=>' inside the parenthesis + // 2. When at least a single comma is found, try parse the contents as an expression list + // 3. When the `to` or `downto` keyword is found, parse the content as a slice name + // + // If none of these apply, it can be either a `subtype_indication` or a single `expression` + + if self.lookahead_in_parens([RightArrow]).is_some() { + self.start_node(FunctionCallOrIndexedName); + self.expect_token(LeftPar); + self.association_list(); + self.expect_token(RightPar); + self.end_node(); + } else if self.lookahead_in_parens([Comma]).is_some() { + self.start_node(FunctionCallOrIndexedName); + self.expect_token(LeftPar); + self.expression_list(); + self.expect_token(RightPar); + self.end_node(); + } else if self + .lookahead_in_parens([Keyword(Kw::To), Keyword(Kw::Downto)]) + .is_some() + { + self.start_node(SliceName); + self.expect_token(LeftPar); + let closing_paren_distance = self.distance_to_closing_paren(); + self.range(closing_paren_distance.unwrap()); + self.expect_token(RightPar); + self.end_node(); + } else { + // TODO: subtype_indication or expression? + self.start_node(Internal(SubtypeIndicationOrExpressionTokens)); + self.expect_token(LeftPar); + match self.distance_to_closing_paren() { + Some(distance) => self.skip_n(distance), + None => { + self.end_node(); + self.eof_err(); + return; + } + } + self.expect_token(RightPar); + self.end_node(); + } + + self.name_tail(); + } else if is_start_of_attribute_name(self) { + self.start_node(AttributeName); + if self.next_is(LeftSquare) { + self.signature(); + } + self.expect_token(Tick); + + // `range` is a keyword, but may appear as a `attribute_name` + if !self.opt_identifier() { + self.expect_kw(Kw::Range); + } + + if self.next_is(LeftPar) { + self.start_node(ParenthesizedExpression); + self.expect_token(LeftPar); + self.expression(); + self.expect_token(RightPar); + self.end_node(); + } + self.end_node(); + self.name_tail(); + } + } + + pub fn external_name(&mut self) { + // LRM §8.7 + self.start_node(ExternalName); + self.expect_token(LtLt); + + self.expect_one_of_tokens([ + Keyword(Kw::Constant), + Keyword(Kw::Signal), + Keyword(Kw::Variable), + ]); + self.external_pathname(); + self.expect_token(Colon); + self.subtype_indication(); + + self.expect_token(GtGt); + self.end_node(); + } + + fn external_pathname(&mut self) { + // LRM §8.7 + self.start_node(ExternalPathName); + match self.peek_token() { + Some(CommAt) => { + self.expect_token(CommAt); + self.identifier(); + self.expect_token(Dot); + self.identifier(); + self.expect_token(Dot); + self.identifier(); + while self.opt_token(Dot) { + self.identifier(); + } + } + Some(Dot) => { + self.expect_token(Dot); + self.partial_pathname(); + } + Some(Circ | Identifier) => { + while self.opt_token(Circ) { + self.expect_token(Dot); + } + self.partial_pathname(); + } + Some(_) => { + self.expect_tokens_err([CommAt, Dot, Circ, Identifier]); + } + None => { + self.eof_err(); + } + } + self.end_node(); + } + + fn partial_pathname(&mut self) { + // LRM §8.7 + // partial_pathname ::= { identifier [ `(` expression `)` ] `.` } identifier ; + self.identifier(); + loop { + if self.next_is(LeftPar) { + self.start_node(ParenthesizedExpression); + self.expect_token(LeftPar); + self.expression(); + self.expect_token(RightPar); + self.end_node(); + self.expect_token(Dot); + } else if !self.opt_token(Dot) { + break; + } + self.identifier(); + } + } +} + +#[cfg(test)] +mod tests { + use crate::parser::{test_utils::check, Parser}; + + #[test] + fn parse_name() { + check( + Parser::name, + "lib1.fn('a', 1, sig).vector(100 downto 10).all", + "\ +Name + Identifier 'lib1' + SelectedName + Dot + Identifier 'fn' + FunctionCallOrIndexedName + LeftPar + ExpressionList + Expression + SimpleExpression + CharacterLiteral ''a'' + Comma + Expression + SimpleExpression + AbstractLiteral + Comma + Expression + SimpleExpression + Identifier 'sig' + RightPar + SelectedName + Dot + Identifier 'vector' + SliceName + LeftPar + Range + SimpleExpression + AbstractLiteral + Keyword(Downto) + SimpleExpression + AbstractLiteral + RightPar + SelectedName + Dot + Keyword(All) +", + ); + } + + #[test] + fn parse_external_name() { + check( + Parser::name, + "<< constant @lib.pkg.obj : std_ulogic >>", + "\ +Name + ExternalName + LtLt + Keyword(Constant) + ExternalPathName + CommAt + Identifier 'lib' + Dot + Identifier 'pkg' + Dot + Identifier 'obj' + Colon + Identifier 'std_ulogic' + GtGt +", + ); + + check( + Parser::name, + "<< variable .tb.sig : bit >>", + "\ +Name + ExternalName + LtLt + Keyword(Variable) + ExternalPathName + Dot + Identifier 'tb' + Dot + Identifier 'sig' + Colon + Identifier 'bit' + GtGt +", + ); + + check( + Parser::name, + "<< signal uut.sig : natural >>", + "\ +Name + ExternalName + LtLt + Keyword(Signal) + ExternalPathName + Identifier 'uut' + Dot + Identifier 'sig' + Colon + Identifier 'natural' + GtGt +", + ); + + check( + Parser::name, + "<< signal ^.up1_signal : real >>", + "\ +Name + ExternalName + LtLt + Keyword(Signal) + ExternalPathName + Circ + Dot + Identifier 'up1_signal' + Colon + Identifier 'real' + GtGt +", + ); + + check( + Parser::name, + "<>", + "\ +Name + ExternalName + LtLt + Keyword(Constant) + ExternalPathName + Circ + Dot + Circ + Dot + Circ + Dot + Circ + Dot + Identifier 'up4_signal' + Colon + Identifier 'integer' + GtGt +", + ); + + check( + Parser::name, + "<< constant .tb.uut.gen(1).sig : bit >>", + "\ +Name + ExternalName + LtLt + Keyword(Constant) + ExternalPathName + Dot + Identifier 'tb' + Dot + Identifier 'uut' + Dot + Identifier 'gen' + ParenthesizedExpression + LeftPar + Expression + SimpleExpression + AbstractLiteral + RightPar + Dot + Identifier 'sig' + Colon + Identifier 'bit' + GtGt +", + ); + } + + #[test] + fn parse_selected_name() { + check( + Parser::name, + "lib.pkg_outer.pkg_inner.obj", + "\ +Name + Identifier 'lib' + SelectedName + Dot + Identifier 'pkg_outer' + SelectedName + Dot + Identifier 'pkg_inner' + SelectedName + Dot + Identifier 'obj' +", + ); + + check( + Parser::name, + "pkg.all", + "\ +Name + Identifier 'pkg' + SelectedName + Dot + Keyword(All) +", + ); + } + + #[test] + fn parse_attribute_name() { + check( + Parser::name, + "obj'left", + "\ +Name + Identifier 'obj' + AttributeName + Tick + Identifier 'left' +", + ); + + check( + Parser::name, + "slv'range", + "\ +Name + Identifier 'slv' + AttributeName + Tick + Keyword(Range) +", + ); + + check( + Parser::name, + "slv'reverse_range", + "\ +Name + Identifier 'slv' + AttributeName + Tick + Identifier 'reverse_range' +", + ); + + check( + Parser::name, + "integer'image(obj)", + "\ +Name + Identifier 'integer' + AttributeName + Tick + Identifier 'image' + ParenthesizedExpression + LeftPar + Expression + SimpleExpression + Identifier 'obj' + RightPar +", + ); + + check( + Parser::name, + "ieee.numeric_std.to_unsigned[natural, natural return unsigned]'simple_name", + "\ +Name + Identifier 'ieee' + SelectedName + Dot + Identifier 'numeric_std' + SelectedName + Dot + Identifier 'to_unsigned' + AttributeName + Signature + LeftSquare + NameList + Name + Identifier 'natural' + Comma + Name + Identifier 'natural' + Keyword(Return) + Name + Identifier 'unsigned' + RightSquare + Tick + Identifier 'simple_name' +", + ); + } } diff --git a/vhdl_syntax/src/parser/productions/scalar_types.rs b/vhdl_syntax/src/parser/productions/scalar_types.rs new file mode 100644 index 00000000..f4e2b2a6 --- /dev/null +++ b/vhdl_syntax/src/parser/productions/scalar_types.rs @@ -0,0 +1,80 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at http://mozilla.org/MPL/2.0/. +// +// Copyright (c) 2024, Lukas Scheller lukasscheller@icloud.com +/// Parsing of scalar types +use crate::parser::Parser; +use crate::syntax::node_kind::NodeKind::*; +use crate::tokens::{Keyword as Kw, TokenKind::*, TokenStream}; + +impl Parser { + pub fn range(&mut self, max_length: usize) { + // LRM §5.2.1 + + // `max_length` should give the distance between the current token position and the end of the range to parse. + // This way the parser can use a bounded lookahead to distinguish between range expressions (using `to` or `downto`) and attribute names. + self.start_node(Range); + + let is_range_expression = self + .lookahead_max_distance(max_length, [Keyword(Kw::To), Keyword(Kw::Downto)]) + .is_some(); + + if is_range_expression { + self.simple_expression(); + self.expect_one_of_tokens([Keyword(Kw::To), Keyword(Kw::Downto)]); + self.simple_expression(); + } else { + self.name(); + } + + self.end_node(); + } +} + +#[cfg(test)] +mod tests { + use crate::parser::{test_utils::check, Parser}; + + #[test] + fn parse_range() { + check( + |parser| Parser::range(parser, usize::MAX), + "100 downto 10", + "\ +Range + SimpleExpression + AbstractLiteral + Keyword(Downto) + SimpleExpression + AbstractLiteral +", + ); + + check( + |parser| Parser::range(parser, usize::MAX), + "0 to 0", + "\ +Range + SimpleExpression + AbstractLiteral + Keyword(To) + SimpleExpression + AbstractLiteral +", + ); + + check( + |parser| Parser::range(parser, usize::MAX), + "slv32_t'range", + "\ +Range + Name + Identifier 'slv32_t' + AttributeName + Tick + Keyword(Range) +", + ); + } +} diff --git a/vhdl_syntax/src/parser/productions/signature.rs b/vhdl_syntax/src/parser/productions/signature.rs index fcb09f7e..d3375571 100644 --- a/vhdl_syntax/src/parser/productions/signature.rs +++ b/vhdl_syntax/src/parser/productions/signature.rs @@ -5,10 +5,106 @@ // Copyright (c) 2025, Lukas Scheller lukasscheller@icloud.com use crate::parser::Parser; +use crate::syntax::node_kind::NodeKind::*; +use crate::tokens::token_kind::Keyword as Kw; +use crate::tokens::token_kind::TokenKind::*; use crate::tokens::TokenStream; impl Parser { - pub fn signature(&self) { - unimplemented!() + pub fn signature(&mut self) { + // LRM §4.5.3 + // signature ::= `[` [ name { `,` name } ] [ `return` name ] `]`; + self.start_node(Signature); + self.expect_token(LeftSquare); + + if !self.next_is_one_of([Keyword(Kw::Return), RightSquare]) { + self.name_list(); + } + + if self.opt_token(Keyword(Kw::Return)) { + self.name(); + } + + self.expect_token(RightSquare); + self.end_node(); + } +} + +#[cfg(test)] +mod tests { + use crate::parser::{test_utils::check, Parser}; + + #[test] + fn parse_signature() { + check( + Parser::signature, + "[natural, bit return unsigned]", + "\ +Signature + LeftSquare + NameList + Name + Identifier 'natural' + Comma + Name + Identifier 'bit' + Keyword(Return) + Name + Identifier 'unsigned' + RightSquare +", + ); + + check( + Parser::signature, + "[]", + "\ +Signature + LeftSquare + RightSquare +", + ); + + check( + Parser::signature, + "[return ret_t]", + "\ +Signature + LeftSquare + Keyword(Return) + Name + Identifier 'ret_t' + RightSquare +", + ); + + check( + Parser::signature, + "[arg1_t, arg2_t]", + "\ +Signature + LeftSquare + NameList + Name + Identifier 'arg1_t' + Comma + Name + Identifier 'arg2_t' + RightSquare +", + ); + + check( + Parser::signature, + "[arg1_t]", + "\ +Signature + LeftSquare + NameList + Name + Identifier 'arg1_t' + RightSquare +", + ); } } diff --git a/vhdl_syntax/src/parser/util.rs b/vhdl_syntax/src/parser/util.rs index 5c32f3f3..6a2d01e3 100644 --- a/vhdl_syntax/src/parser/util.rs +++ b/vhdl_syntax/src/parser/util.rs @@ -115,6 +115,13 @@ impl Parser { self.peek_token() == Some(kind) } + pub(crate) fn next_is_one_of(&self, kinds: [TokenKind; N]) -> bool { + match self.peek_token() { + Some(tok) => kinds.contains(&tok), + None => false, + } + } + pub(crate) fn next_nth_is(&self, kind: TokenKind, n: usize) -> bool { self.peek_nth_token(n) == Some(kind) } @@ -174,4 +181,90 @@ impl Parser { pub(crate) fn end(self) -> (GreenNode, Vec) { (self.builder.end(), self.diagnostics) } + + fn distance_to_closing_paren_or_token_internal( + &mut self, + kind: Option, + ) -> Option { + let mut idx = 0; + let mut paren_count = 1; + + while paren_count > 0 { + match self.peek_nth_token(idx) { + Some(TokenKind::LeftPar) => paren_count += 1, + Some(TokenKind::RightPar) => paren_count -= 1, + Some(tok) => { + if Some(tok) == kind { + return Some(idx); + } + } + None => return None, + } + idx += 1; + } + + Some(idx - 1) + } + + pub(crate) fn distance_to_closing_paren(&mut self) -> Option { + self.distance_to_closing_paren_or_token_internal(None) + } + + pub(crate) fn distance_to_closing_paren_or_token(&mut self, kind: TokenKind) -> Option { + self.distance_to_closing_paren_or_token_internal(Some(kind)) + } + + pub(crate) fn lookahead_in_parens( + &mut self, + kinds: [TokenKind; N], + ) -> Option<(TokenKind, usize)> { + let mut idx = 0; + let mut paren_count = 0; + let mut paren_found = false; + + while !paren_found || paren_count > 0 { + match self.peek_nth_token(idx) { + Some(TokenKind::LeftPar) => { + paren_count += 1; + paren_found = true; + } + Some(TokenKind::RightPar) => paren_count -= 1, + Some(tok) => { + if paren_count == 1 && kinds.contains(&tok) { + return Some((tok, idx)); + } + } + None => return None, + } + idx += 1; + } + + None + } + + pub(crate) fn lookahead_max_distance( + &mut self, + maximum_distance: usize, + kinds: [TokenKind; N], + ) -> Option<(TokenKind, usize)> { + let mut idx = 0; + let mut paren_count = 0; + + while idx < maximum_distance { + match self.peek_nth_token(idx) { + Some(TokenKind::LeftPar) => paren_count += 1, + Some(TokenKind::RightPar) => paren_count -= 1, + + Some(tok) => { + if paren_count == 0 && kinds.contains(&tok) { + return Some((tok, idx)); + } + } + None => return None, + } + idx += 1; + } + + None + } } diff --git a/vhdl_syntax/src/syntax/node_kind.rs b/vhdl_syntax/src/syntax/node_kind.rs index daba6b29..038738a1 100644 --- a/vhdl_syntax/src/syntax/node_kind.rs +++ b/vhdl_syntax/src/syntax/node_kind.rs @@ -4,6 +4,12 @@ // // Copyright (c) 2025, Lukas Scheller lukasscheller@icloud.com +#[derive(PartialEq, Eq, Copy, Clone, Debug)] +pub enum InternalNodeKind { + ActualPartTokens, + SubtypeIndicationOrExpressionTokens, +} + #[derive(PartialEq, Eq, Copy, Clone, Debug)] pub enum NodeKind { AttributeDeclaration, @@ -24,6 +30,9 @@ pub enum NodeKind { DesignUnit, DesignFile, ContextClause, + LibraryClause, + UseClause, + ContextReference, GenericClause, PortClause, InterfaceList, @@ -34,5 +43,22 @@ pub enum NodeKind { EntityDesignator, Label, BlockStatement, - InterfaceObjectDeclaration, // ... + InterfaceObjectDeclaration, + ParenthesizedExpression, + Expression, + SimpleExpression, + ExpressionList, + Range, + SelectedName, + ExternalName, + ExternalPathName, + AttributeName, + FunctionCallOrIndexedName, + SliceName, + Internal(InternalNodeKind), + NameList, + AssociationList, + AssociationElement, + FormalPart, + ActualPart, // ... } From 15b2594c4d847c29159ff659c15903a7ace46b85 Mon Sep 17 00:00:00 2001 From: Sebastian Kaupper Date: Wed, 29 Jan 2025 14:05:36 +0100 Subject: [PATCH 2/2] Improve (bounded) lookahead and simplify parsing of ambiguous names --- vhdl_syntax/src/parser/mod.rs | 2 + vhdl_syntax/src/parser/productions/design.rs | 91 ++++++----- .../src/parser/productions/interface.rs | 63 ++++---- vhdl_syntax/src/parser/productions/names.rs | 149 +++++++----------- .../src/parser/productions/scalar_types.rs | 17 +- vhdl_syntax/src/parser/util.rs | 113 ++++++------- vhdl_syntax/src/syntax/node_kind.rs | 8 +- 7 files changed, 185 insertions(+), 258 deletions(-) diff --git a/vhdl_syntax/src/parser/mod.rs b/vhdl_syntax/src/parser/mod.rs index 17398241..01167da0 100644 --- a/vhdl_syntax/src/parser/mod.rs +++ b/vhdl_syntax/src/parser/mod.rs @@ -28,6 +28,7 @@ pub struct Parser { builder: builder::NodeBuilder, diagnostics: Vec, unexpected_eof: bool, + token_index: usize, } impl Parser { @@ -37,6 +38,7 @@ impl Parser { builder: builder::NodeBuilder::new(), diagnostics: Vec::default(), unexpected_eof: false, + token_index: 0, } } diff --git a/vhdl_syntax/src/parser/productions/design.rs b/vhdl_syntax/src/parser/productions/design.rs index cbf4575b..78d8c9b7 100644 --- a/vhdl_syntax/src/parser/productions/design.rs +++ b/vhdl_syntax/src/parser/productions/design.rs @@ -42,6 +42,7 @@ impl Parser { } pub fn context_clause(&mut self) { + self.start_node(NodeKind::ContextClause); loop { match self.tokenizer.peek_next() { Some(tok) => match tok.kind() { @@ -53,11 +54,12 @@ impl Parser { _ => self.eof_err(), } } + self.end_node(); } pub fn library_clause(&mut self) { self.start_node(NodeKind::LibraryClause); - self.expect_token(Keyword(Kw::Library)); + self.expect_kw(Kw::Library); self.identifier_list(); self.expect_token(SemiColon); self.end_node(); @@ -65,7 +67,7 @@ impl Parser { pub fn use_clause(&mut self) { self.start_node(NodeKind::UseClause); - self.expect_token(Keyword(Kw::Use)); + self.expect_kw(Kw::Use); self.name_list(); self.expect_token(SemiColon); self.end_node(); @@ -78,29 +80,25 @@ impl Parser { #[cfg(test)] mod tests { - use crate::parser::{CanParse, Parser}; - use crate::tokens; - use crate::tokens::IntoTokenStream; - use pretty_assertions::assert_eq; + use crate::parser::{test_utils::check, Parser}; #[test] fn parse_simple_entity() { - let (entity, _) = tokens! { - entity my_ent is - begin - end my_ent; + check( + Parser::design_file, + "\ +entity my_ent is +begin +end my_ent; - entity my_ent2 is - begin - end entity; - } - .into_token_stream() - .parse_syntax(Parser::design_file); - assert_eq!( - entity.test_text(), +entity my_ent2 is +begin +end entity; +", "\ DesignFile DesignUnit + ContextClause EntityDeclaration Keyword(Entity) Identifier 'my_ent' @@ -111,6 +109,7 @@ DesignFile Identifier 'my_ent' SemiColon DesignUnit + ContextClause EntityDeclaration Keyword(Entity) Identifier 'my_ent2' @@ -120,43 +119,43 @@ DesignFile Keyword(End) Keyword(Entity) SemiColon -" +", ); } #[test] fn parse_entity_with_context_clause() { - let (design, _) = "\ + check( + Parser::design_file, + "\ library ieee; use ieee.std_logic_1164.all; entity my_ent is begin end my_ent; - " - .parse_syntax(Parser::design_file); - assert_eq!( - design.test_text(), + ", "\ DesignFile DesignUnit - LibraryClause - Keyword(Library) - IdentifierList - Identifier 'ieee' - SemiColon - UseClause - Keyword(Use) - NameList - Name + ContextClause + LibraryClause + Keyword(Library) + IdentifierList Identifier 'ieee' - SelectedName - Dot - Identifier 'std_logic_1164' - SelectedName - Dot - Keyword(All) - SemiColon + SemiColon + UseClause + Keyword(Use) + NameList + Name + Identifier 'ieee' + SelectedName + Dot + Identifier 'std_logic_1164' + SelectedName + Dot + Keyword(All) + SemiColon EntityDeclaration Keyword(Entity) Identifier 'my_ent' @@ -166,17 +165,15 @@ DesignFile Keyword(End) Identifier 'my_ent' SemiColon -" +", ); } #[test] fn parse_use_clause() { - let (node, diag) = "use lib1.lib2.lib3.all;".parse_syntax(Parser::use_clause); - assert_eq!(diag.len(), 0); - - assert_eq!( - node.test_text(), + check( + Parser::use_clause, + "use lib1.lib2.lib3.all;", "\ UseClause Keyword(Use) @@ -193,7 +190,7 @@ UseClause Dot Keyword(All) SemiColon -" +", ); } } diff --git a/vhdl_syntax/src/parser/productions/interface.rs b/vhdl_syntax/src/parser/productions/interface.rs index 8a2f221e..9cb069a8 100644 --- a/vhdl_syntax/src/parser/productions/interface.rs +++ b/vhdl_syntax/src/parser/productions/interface.rs @@ -5,7 +5,6 @@ // Copyright (c) 2025, Lukas Scheller lukasscheller@icloud.com use crate::parser::Parser; -use crate::syntax::node_kind::InternalNodeKind; use crate::syntax::node_kind::NodeKind::*; use crate::tokens::Keyword as Kw; use crate::tokens::TokenKind::*; @@ -90,27 +89,33 @@ impl Parser { } pub fn association_list(&mut self) { + self.association_list_bounded(usize::MAX); + } + fn association_list_bounded(&mut self, max_index: usize) { self.start_node(AssociationList); - self.separated_list(Parser::association_element, Comma); + self.separated_list( + |parser| { + let end_of_element_idx = + match parser.lookahead_max_token_index(max_index, [Comma, RightPar]) { + Ok((_, idx)) => idx, + Err(idx) => idx, + }; + parser.association_element_bounded(end_of_element_idx); + }, + Comma, + ); self.end_node(); } - pub fn association_element(&mut self) { + fn association_element_bounded(&mut self, max_index: usize) { self.start_node(AssociationElement); - let right_arrow_idx = match self.distance_to_closing_paren_or_token(Comma) { - Some(length) => self.lookahead_max_distance(length, [RightArrow]), - None => { - self.eof_err(); - return; - } - }; - - if right_arrow_idx.is_some() { + // TODO: Error handling is done at a bare minimum. + if let Ok(_) = self.lookahead_max_token_index(max_index, [RightArrow]) { self.formal_part(); self.expect_token(RightArrow); } - self.actual_part(); + self.actual_part_bounded(max_index); self.end_node(); } @@ -122,19 +127,11 @@ impl Parser { self.end_node(); } - pub fn actual_part(&mut self) { + fn actual_part_bounded(&mut self, max_index: usize) { self.start_node(ActualPart); - let length = match self.distance_to_closing_paren_or_token(Comma) { - Some(distance) => distance, - None => { - self.eof_err(); - return; - } - }; - - // TODO: Parsing of `actual_part` would boil down to `name | expression | subtype_indication` - self.start_node(Internal(InternalNodeKind::ActualPartTokens)); - self.skip_n(length); + // Parsing of `actual_part` would boil down to `name | expression | subtype_indication` + self.start_node(RawTokens); + self.skip_to(max_index); self.end_node(); self.end_node(); } @@ -147,28 +144,26 @@ mod tests { #[test] fn association_list() { - // Make sure the association list is followed by a closing parenthesis, otherwise parsing will fail - // In reality that shouldn't be a problem, since association lists are always to be enclosed in parenthesis! check( Parser::association_list, - "arg1, arg2)", + "arg1, arg2", "\ AssociationList AssociationElement ActualPart - Internal(ActualPartTokens) + RawTokens Identifier 'arg1' Comma AssociationElement ActualPart - Internal(ActualPartTokens) + RawTokens Identifier 'arg2' ", ); check( Parser::association_list, - "p1 => 1, std_ulogic(p2)=> sl_sig)", + "p1 => 1, std_ulogic(p2)=> sl_sig", "\ AssociationList AssociationElement @@ -177,20 +172,20 @@ AssociationList Identifier 'p1' RightArrow ActualPart - Internal(ActualPartTokens) + RawTokens AbstractLiteral Comma AssociationElement FormalPart Name Identifier 'std_ulogic' - Internal(SubtypeIndicationOrExpressionTokens) + RawTokens LeftPar Identifier 'p2' RightPar RightArrow ActualPart - Internal(ActualPartTokens) + RawTokens Identifier 'sl_sig' ", ); diff --git a/vhdl_syntax/src/parser/productions/names.rs b/vhdl_syntax/src/parser/productions/names.rs index 7d2a354e..8009054c 100644 --- a/vhdl_syntax/src/parser/productions/names.rs +++ b/vhdl_syntax/src/parser/productions/names.rs @@ -5,7 +5,6 @@ // Copyright (c) 2025, Lukas Scheller lukasscheller@icloud.com use crate::parser::Parser; -use crate::syntax::node_kind::InternalNodeKind::*; use crate::syntax::node_kind::NodeKind::*; use crate::tokens::Keyword as Kw; use crate::tokens::TokenKind::*; @@ -41,10 +40,6 @@ fn is_start_of_attribute_name(parser: &mut Parser) -> bool { } impl Parser { - pub fn designator(&mut self) { - self.expect_one_of_tokens([Identifier, StringLiteral, CharacterLiteral]); - } - pub fn name(&mut self) { // (Based on) LRM §8.1 // The LRM grammar rules for names were transformed to avoid left recursion. @@ -56,7 +51,7 @@ impl Parser { if self.next_is(LtLt) { self.external_name(); } else { - self.designator(); + self.expect_one_of_tokens([Identifier, StringLiteral, CharacterLiteral]); } self.name_tail(); @@ -67,14 +62,19 @@ impl Parser { self.name() } - pub fn opt_label(&mut self) { + pub(crate) fn designator(&mut self) { + // TODO: That designator is not fully LRM compliant + self.expect_one_of_tokens([Identifier, StringLiteral, CharacterLiteral]); + } + + pub(crate) fn opt_label(&mut self) { if self.next_is(Identifier) && self.next_nth_is(Colon, 1) { self.start_node(Label); self.skip_n(2); self.end_node(); } } - pub fn name_list(&mut self) { + pub(crate) fn name_list(&mut self) { self.start_node(NameList); self.separated_list(Parser::name, Comma); self.end_node(); @@ -107,50 +107,24 @@ impl Parser { self.end_node(); self.name_tail(); } else if self.next_is(LeftPar) { - // Try to differentiate between function calls, indexed names and slices as good as possible: - // 1. An `association_list` can be uniquely identified by searching for a '=>' inside the parenthesis - // 2. When at least a single comma is found, try parse the contents as an expression list - // 3. When the `to` or `downto` keyword is found, parse the content as a slice name - // - // If none of these apply, it can be either a `subtype_indication` or a single `expression` - - if self.lookahead_in_parens([RightArrow]).is_some() { - self.start_node(FunctionCallOrIndexedName); - self.expect_token(LeftPar); - self.association_list(); - self.expect_token(RightPar); - self.end_node(); - } else if self.lookahead_in_parens([Comma]).is_some() { - self.start_node(FunctionCallOrIndexedName); - self.expect_token(LeftPar); - self.expression_list(); - self.expect_token(RightPar); - self.end_node(); - } else if self - .lookahead_in_parens([Keyword(Kw::To), Keyword(Kw::Downto)]) - .is_some() - { - self.start_node(SliceName); - self.expect_token(LeftPar); - let closing_paren_distance = self.distance_to_closing_paren(); - self.range(closing_paren_distance.unwrap()); - self.expect_token(RightPar); - self.end_node(); - } else { - // TODO: subtype_indication or expression? - self.start_node(Internal(SubtypeIndicationOrExpressionTokens)); - self.expect_token(LeftPar); - match self.distance_to_closing_paren() { - Some(distance) => self.skip_n(distance), - None => { - self.end_node(); - self.eof_err(); - return; - } + // Instead of trying to differentiate between `subtype_indication`, `association_list`, a list of `expression`s and a `discrete_range` + // put all tokens inside the parenthesis in a `RawTokens` node. + self.start_node(RawTokens); + self.expect_token(LeftPar); + match self.lookahead([RightPar]) { + Ok((_, end_index)) => { + self.skip_to(end_index); + } + Err(_) => { + // TODO: The parenthesized expression is not terminated correctly + // Find some way to handle this gracefully! + self.eof_err(); + self.end_node(); + return; } - self.expect_token(RightPar); - self.end_node(); } + self.expect_token(RightPar); + self.end_node(); self.name_tail(); } else if is_start_of_attribute_name(self) { @@ -160,7 +134,7 @@ impl Parser { } self.expect_token(Tick); - // `range` is a keyword, but may appear as a `attribute_name` + // `range` is a keyword, but may appear as an `attribute_name` if !self.opt_identifier() { self.expect_kw(Kw::Range); } @@ -198,35 +172,28 @@ impl Parser { fn external_pathname(&mut self) { // LRM §8.7 self.start_node(ExternalPathName); - match self.peek_token() { - Some(CommAt) => { - self.expect_token(CommAt); - self.identifier(); - self.expect_token(Dot); - self.identifier(); - self.expect_token(Dot); + match_next_token!(self, + CommAt => { + self.expect_token(CommAt); + self.identifier(); + self.expect_token(Dot); + self.identifier(); + self.expect_token(Dot); + self.identifier(); + while self.opt_token(Dot) { self.identifier(); - while self.opt_token(Dot) { - self.identifier(); - } } - Some(Dot) => { + }, + Dot => { + self.expect_token(Dot); + self.partial_pathname(); + }, + Circ, Identifier => { + while self.opt_token(Circ) { self.expect_token(Dot); - self.partial_pathname(); } - Some(Circ | Identifier) => { - while self.opt_token(Circ) { - self.expect_token(Dot); - } - self.partial_pathname(); - } - Some(_) => { - self.expect_tokens_err([CommAt, Dot, Circ, Identifier]); - } - None => { - self.eof_err(); - } - } + self.partial_pathname(); + }); self.end_node(); } @@ -265,32 +232,22 @@ Name SelectedName Dot Identifier 'fn' - FunctionCallOrIndexedName + RawTokens LeftPar - ExpressionList - Expression - SimpleExpression - CharacterLiteral ''a'' - Comma - Expression - SimpleExpression - AbstractLiteral - Comma - Expression - SimpleExpression - Identifier 'sig' + CharacterLiteral ''a'' + Comma + AbstractLiteral + Comma + Identifier 'sig' RightPar SelectedName Dot Identifier 'vector' - SliceName + RawTokens LeftPar - Range - SimpleExpression - AbstractLiteral - Keyword(Downto) - SimpleExpression - AbstractLiteral + AbstractLiteral + Keyword(Downto) + AbstractLiteral RightPar SelectedName Dot diff --git a/vhdl_syntax/src/parser/productions/scalar_types.rs b/vhdl_syntax/src/parser/productions/scalar_types.rs index f4e2b2a6..99965f06 100644 --- a/vhdl_syntax/src/parser/productions/scalar_types.rs +++ b/vhdl_syntax/src/parser/productions/scalar_types.rs @@ -9,16 +9,19 @@ use crate::syntax::node_kind::NodeKind::*; use crate::tokens::{Keyword as Kw, TokenKind::*, TokenStream}; impl Parser { - pub fn range(&mut self, max_length: usize) { + pub fn range(&mut self) { + self.range_bounded(usize::MAX); + } + fn range_bounded(&mut self, max_index: usize) { // LRM §5.2.1 - // `max_length` should give the distance between the current token position and the end of the range to parse. + // `max_index` should point to the end of the range to parse (exclusive). // This way the parser can use a bounded lookahead to distinguish between range expressions (using `to` or `downto`) and attribute names. self.start_node(Range); let is_range_expression = self - .lookahead_max_distance(max_length, [Keyword(Kw::To), Keyword(Kw::Downto)]) - .is_some(); + .lookahead_max_token_index(max_index, [Keyword(Kw::To), Keyword(Kw::Downto)]) + .is_ok(); if is_range_expression { self.simple_expression(); @@ -39,7 +42,7 @@ mod tests { #[test] fn parse_range() { check( - |parser| Parser::range(parser, usize::MAX), + Parser::range, "100 downto 10", "\ Range @@ -52,7 +55,7 @@ Range ); check( - |parser| Parser::range(parser, usize::MAX), + Parser::range, "0 to 0", "\ Range @@ -65,7 +68,7 @@ Range ); check( - |parser| Parser::range(parser, usize::MAX), + Parser::range, "slv32_t'range", "\ Range diff --git a/vhdl_syntax/src/parser/util.rs b/vhdl_syntax/src/parser/util.rs index 6a2d01e3..425e5cae 100644 --- a/vhdl_syntax/src/parser/util.rs +++ b/vhdl_syntax/src/parser/util.rs @@ -67,18 +67,28 @@ impl Parser { pub(crate) fn skip(&mut self) { if let Some(token) = self.tokenizer.next() { self.builder.push(token); + self.token_index += 1; } } pub(crate) fn skip_n(&mut self, n: usize) { for _ in 0..n { - self.skip() + self.skip(); + if self.peek_token().is_none() { + break; + } } } + pub(crate) fn skip_to(&mut self, token_index: usize) { + assert!(token_index > self.token_index); + self.skip_n(token_index - self.token_index); + } + pub(crate) fn expect_token(&mut self, kind: TokenKind) { if let Some(token) = self.tokenizer.next_if(|token| token.kind() == kind) { self.builder.push(token); + self.token_index += 1; return; } // TODO: what are possible recovery strategies? @@ -133,6 +143,7 @@ impl Parser { pub(crate) fn opt_token(&mut self, kind: TokenKind) -> bool { if let Some(token) = self.tokenizer.next_if(|token| token.kind() == kind) { self.builder.push(token); + self.token_index += 1; true } else { false @@ -149,6 +160,7 @@ impl Parser { { let kind = token.kind(); self.builder.push(token); + self.token_index += 1; Some(kind) } else { None @@ -182,89 +194,56 @@ impl Parser { (self.builder.end(), self.diagnostics) } - fn distance_to_closing_paren_or_token_internal( + pub(crate) fn lookahead( &mut self, - kind: Option, - ) -> Option { - let mut idx = 0; - let mut paren_count = 1; - - while paren_count > 0 { - match self.peek_nth_token(idx) { - Some(TokenKind::LeftPar) => paren_count += 1, - Some(TokenKind::RightPar) => paren_count -= 1, - Some(tok) => { - if Some(tok) == kind { - return Some(idx); - } - } - None => return None, - } - idx += 1; - } - - Some(idx - 1) - } - - pub(crate) fn distance_to_closing_paren(&mut self) -> Option { - self.distance_to_closing_paren_or_token_internal(None) - } - - pub(crate) fn distance_to_closing_paren_or_token(&mut self, kind: TokenKind) -> Option { - self.distance_to_closing_paren_or_token_internal(Some(kind)) + kinds: [TokenKind; N], + ) -> Result<(TokenKind, usize), usize> { + self.lookahead_max_token_index(usize::MAX, kinds) } - pub(crate) fn lookahead_in_parens( + /// Lookahead in the current token stream until one of the given `TokenKind`s are found. + /// In case of success, the matching `TokenKind` is returned, as well as the token index it was found at. + /// In case of an error (EOF or a nesting error) the index at which the lookahead ended is returned. + /// + /// TODO: For better error handling you probably will need a way to differentiate between EOF and nesting errors! + pub(crate) fn lookahead_max_token_index( &mut self, + maximum_index: usize, kinds: [TokenKind; N], - ) -> Option<(TokenKind, usize)> { - let mut idx = 0; + ) -> Result<(TokenKind, usize), usize> { + let mut length = 0; let mut paren_count = 0; - let mut paren_found = false; - while !paren_found || paren_count > 0 { - match self.peek_nth_token(idx) { - Some(TokenKind::LeftPar) => { - paren_count += 1; - paren_found = true; - } - Some(TokenKind::RightPar) => paren_count -= 1, - Some(tok) => { - if paren_count == 1 && kinds.contains(&tok) { - return Some((tok, idx)); + while self.token_index + length <= maximum_index && paren_count >= 0 { + match self.peek_nth_token(length) { + Some(TokenKind::LeftPar) => paren_count += 1, + Some(TokenKind::RightPar) => { + // Allow the closing parenthesis to match as well + if paren_count == 0 && kinds.contains(&TokenKind::RightPar) { + return Ok((TokenKind::RightPar, self.token_index + length)); } - } - None => return None, - } - idx += 1; - } - None - } + paren_count -= 1; - pub(crate) fn lookahead_max_distance( - &mut self, - maximum_distance: usize, - kinds: [TokenKind; N], - ) -> Option<(TokenKind, usize)> { - let mut idx = 0; - let mut paren_count = 0; - - while idx < maximum_distance { - match self.peek_nth_token(idx) { - Some(TokenKind::LeftPar) => paren_count += 1, - Some(TokenKind::RightPar) => paren_count -= 1, + // A closing parenthesis indicates that some form of + // grouping ended that was not started during this lookahead. + if paren_count < 0 { + return Err(self.token_index + length); + } + } Some(tok) => { + // To avoid matching tokens in some (potentially recursive) sub expression of some sort, + // only check the current token if we at the outer most grouping layer (`paren_count == 0`). if paren_count == 0 && kinds.contains(&tok) { - return Some((tok, idx)); + return Ok((tok, self.token_index + length)); } } - None => return None, + None => return Err(self.token_index + length), } - idx += 1; + length += 1; } - None + Err(self.token_index + length) } } diff --git a/vhdl_syntax/src/syntax/node_kind.rs b/vhdl_syntax/src/syntax/node_kind.rs index 038738a1..2cd568c2 100644 --- a/vhdl_syntax/src/syntax/node_kind.rs +++ b/vhdl_syntax/src/syntax/node_kind.rs @@ -4,12 +4,6 @@ // // Copyright (c) 2025, Lukas Scheller lukasscheller@icloud.com -#[derive(PartialEq, Eq, Copy, Clone, Debug)] -pub enum InternalNodeKind { - ActualPartTokens, - SubtypeIndicationOrExpressionTokens, -} - #[derive(PartialEq, Eq, Copy, Clone, Debug)] pub enum NodeKind { AttributeDeclaration, @@ -55,7 +49,7 @@ pub enum NodeKind { AttributeName, FunctionCallOrIndexedName, SliceName, - Internal(InternalNodeKind), + RawTokens, NameList, AssociationList, AssociationElement,