diff --git a/vhdl_syntax/src/parser/mod.rs b/vhdl_syntax/src/parser/mod.rs index 17398241..01167da0 100644 --- a/vhdl_syntax/src/parser/mod.rs +++ b/vhdl_syntax/src/parser/mod.rs @@ -28,6 +28,7 @@ pub struct Parser { builder: builder::NodeBuilder, diagnostics: Vec, unexpected_eof: bool, + token_index: usize, } impl Parser { @@ -37,6 +38,7 @@ impl Parser { builder: builder::NodeBuilder::new(), diagnostics: Vec::default(), unexpected_eof: false, + token_index: 0, } } diff --git a/vhdl_syntax/src/parser/productions/design.rs b/vhdl_syntax/src/parser/productions/design.rs index 23213d5d..78d8c9b7 100644 --- a/vhdl_syntax/src/parser/productions/design.rs +++ b/vhdl_syntax/src/parser/productions/design.rs @@ -41,34 +41,64 @@ impl Parser { self.end_node(); } - pub fn context_clause(&mut self) {} + pub fn context_clause(&mut self) { + self.start_node(NodeKind::ContextClause); + loop { + match self.tokenizer.peek_next() { + Some(tok) => match tok.kind() { + Keyword(Kw::Use) => self.use_clause(), + Keyword(Kw::Library) => self.library_clause(), + Keyword(Kw::Context) => self.context_reference(), + _ => break, + }, + _ => self.eof_err(), + } + } + self.end_node(); + } + + pub fn library_clause(&mut self) { + self.start_node(NodeKind::LibraryClause); + self.expect_kw(Kw::Library); + self.identifier_list(); + self.expect_token(SemiColon); + self.end_node(); + } + + pub fn use_clause(&mut self) { + self.start_node(NodeKind::UseClause); + self.expect_kw(Kw::Use); + self.name_list(); + self.expect_token(SemiColon); + self.end_node(); + } + + pub fn context_reference(&mut self) { + todo!(); + } } #[cfg(test)] mod tests { - use crate::parser::{CanParse, Parser}; - use crate::tokens; - use crate::tokens::IntoTokenStream; - use pretty_assertions::assert_eq; + use crate::parser::{test_utils::check, Parser}; #[test] fn parse_simple_entity() { - let (entity, _) = tokens! { - entity my_ent is - begin - end my_ent; + check( + Parser::design_file, + "\ +entity my_ent is +begin +end my_ent; - entity my_ent2 is - begin - end entity; - } - .into_token_stream() - .parse_syntax(Parser::design_file); - assert_eq!( - entity.test_text(), +entity my_ent2 is +begin +end entity; +", "\ DesignFile DesignUnit + ContextClause EntityDeclaration Keyword(Entity) Identifier 'my_ent' @@ -79,6 +109,7 @@ DesignFile Identifier 'my_ent' SemiColon DesignUnit + ContextClause EntityDeclaration Keyword(Entity) Identifier 'my_ent2' @@ -88,7 +119,78 @@ DesignFile Keyword(End) Keyword(Entity) SemiColon -" +", + ); + } + + #[test] + fn parse_entity_with_context_clause() { + check( + Parser::design_file, + "\ + library ieee; + use ieee.std_logic_1164.all; + + entity my_ent is + begin + end my_ent; + ", + "\ +DesignFile + DesignUnit + ContextClause + LibraryClause + Keyword(Library) + IdentifierList + Identifier 'ieee' + SemiColon + UseClause + Keyword(Use) + NameList + Name + Identifier 'ieee' + SelectedName + Dot + Identifier 'std_logic_1164' + SelectedName + Dot + Keyword(All) + SemiColon + EntityDeclaration + Keyword(Entity) + Identifier 'my_ent' + Keyword(Is) + EntityHeader + Keyword(Begin) + Keyword(End) + Identifier 'my_ent' + SemiColon +", + ); + } + + #[test] + fn parse_use_clause() { + check( + Parser::use_clause, + "use lib1.lib2.lib3.all;", + "\ +UseClause + Keyword(Use) + NameList + Name + Identifier 'lib1' + SelectedName + Dot + Identifier 'lib2' + SelectedName + Dot + Identifier 'lib3' + SelectedName + Dot + Keyword(All) + SemiColon +", ); } } diff --git a/vhdl_syntax/src/parser/productions/expression.rs b/vhdl_syntax/src/parser/productions/expression.rs index a2a913bb..f1f303c5 100644 --- a/vhdl_syntax/src/parser/productions/expression.rs +++ b/vhdl_syntax/src/parser/productions/expression.rs @@ -5,11 +5,29 @@ // Copyright (c) 2025, Lukas Scheller lukasscheller@icloud.com use crate::parser::Parser; +use crate::syntax::node_kind::NodeKind::*; +use crate::tokens::TokenKind::*; use crate::tokens::TokenStream; impl Parser { pub fn expression(&mut self) { - // TODO + self.start_node(Expression); + // TODO: Expecting a simple expression is just a placeholder + self.simple_expression(); + self.end_node(); + } + + pub fn simple_expression(&mut self) { + self.start_node(SimpleExpression); + // TODO: Expecting these literals is just a placeholder + self.expect_one_of_tokens([CharacterLiteral, StringLiteral, Identifier, AbstractLiteral]); + self.end_node(); + } + + pub fn expression_list(&mut self) { + self.start_node(ExpressionList); + self.separated_list(Parser::expression, Comma); + self.end_node(); } pub fn condition(&mut self) { diff --git a/vhdl_syntax/src/parser/productions/interface.rs b/vhdl_syntax/src/parser/productions/interface.rs index 85d102e7..9cb069a8 100644 --- a/vhdl_syntax/src/parser/productions/interface.rs +++ b/vhdl_syntax/src/parser/productions/interface.rs @@ -87,6 +87,54 @@ impl Parser { Keyword(Kw::Linkage), ]); } + + pub fn association_list(&mut self) { + self.association_list_bounded(usize::MAX); + } + fn association_list_bounded(&mut self, max_index: usize) { + self.start_node(AssociationList); + self.separated_list( + |parser| { + let end_of_element_idx = + match parser.lookahead_max_token_index(max_index, [Comma, RightPar]) { + Ok((_, idx)) => idx, + Err(idx) => idx, + }; + parser.association_element_bounded(end_of_element_idx); + }, + Comma, + ); + self.end_node(); + } + + fn association_element_bounded(&mut self, max_index: usize) { + self.start_node(AssociationElement); + + // TODO: Error handling is done at a bare minimum. + if let Ok(_) = self.lookahead_max_token_index(max_index, [RightArrow]) { + self.formal_part(); + self.expect_token(RightArrow); + } + self.actual_part_bounded(max_index); + + self.end_node(); + } + + pub fn formal_part(&mut self) { + self.start_node(FormalPart); + self.name(); + // Note: `self.name()` will already consume any trailing parenthesized names! + self.end_node(); + } + + fn actual_part_bounded(&mut self, max_index: usize) { + self.start_node(ActualPart); + // Parsing of `actual_part` would boil down to `name | expression | subtype_indication` + self.start_node(RawTokens); + self.skip_to(max_index); + self.end_node(); + self.end_node(); + } } #[cfg(test)] @@ -94,6 +142,55 @@ mod tests { use crate::parser::test_utils::check; use crate::parser::Parser; + #[test] + fn association_list() { + check( + Parser::association_list, + "arg1, arg2", + "\ +AssociationList + AssociationElement + ActualPart + RawTokens + Identifier 'arg1' + Comma + AssociationElement + ActualPart + RawTokens + Identifier 'arg2' +", + ); + + check( + Parser::association_list, + "p1 => 1, std_ulogic(p2)=> sl_sig", + "\ +AssociationList + AssociationElement + FormalPart + Name + Identifier 'p1' + RightArrow + ActualPart + RawTokens + AbstractLiteral + Comma + AssociationElement + FormalPart + Name + Identifier 'std_ulogic' + RawTokens + LeftPar + Identifier 'p2' + RightPar + RightArrow + ActualPart + RawTokens + Identifier 'sl_sig' +", + ); + } + #[test] fn empty_generic_clause() { check( diff --git a/vhdl_syntax/src/parser/productions/mod.rs b/vhdl_syntax/src/parser/productions/mod.rs index e9fc2a74..1c64d637 100644 --- a/vhdl_syntax/src/parser/productions/mod.rs +++ b/vhdl_syntax/src/parser/productions/mod.rs @@ -16,3 +16,4 @@ mod names; mod signature; mod statements; mod subtype; +mod scalar_types; diff --git a/vhdl_syntax/src/parser/productions/names.rs b/vhdl_syntax/src/parser/productions/names.rs index 99f583c1..8009054c 100644 --- a/vhdl_syntax/src/parser/productions/names.rs +++ b/vhdl_syntax/src/parser/productions/names.rs @@ -5,19 +5,56 @@ // Copyright (c) 2025, Lukas Scheller lukasscheller@icloud.com use crate::parser::Parser; -use crate::syntax::node_kind::NodeKind::{Label, Name}; +use crate::syntax::node_kind::NodeKind::*; +use crate::tokens::Keyword as Kw; use crate::tokens::TokenKind::*; use crate::tokens::TokenStream; -impl Parser { - pub fn designator(&mut self) { - self.expect_one_of_tokens([Identifier, StringLiteral, CharacterLiteral]); +fn is_start_of_attribute_name(parser: &mut Parser) -> bool { + // Checking for `LeftSquare || Tick` will result in ambiguities with other grammar rules where a signature is possible right after a name. + // Those rules can be `alias_declaration` (LRM §6.6.1) and `subprogram_instantiation_declaration` (LRM §4.4). + // By checking whether the closing square bracket is followed by a `Tick` this ambiguity is resolved + match parser.peek_token() { + Some(Tick) => true, + Some(LeftSquare) => { + let mut idx = 1; + let mut bracket_count = 1; + + while bracket_count > 0 { + match parser.peek_nth_token(idx) { + Some(LeftSquare) => bracket_count += 1, + Some(RightSquare) => bracket_count -= 1, + Some(_) => {} + None => { + return false; + } + } + + idx += 1; + } + + parser.next_nth_is(Tick, idx) + } + Some(_) | None => false, } +} +impl Parser { pub fn name(&mut self) { + // (Based on) LRM §8.1 + // The LRM grammar rules for names were transformed to avoid left recursion. + + // In contrast to the LRM, this parsing routine is greedy. Meaning, it will consume trailing parenthesized + // expressions even if the belong to an outer grammar rule! self.start_node(Name); - // TODO - self.designator(); + + if self.next_is(LtLt) { + self.external_name(); + } else { + self.expect_one_of_tokens([Identifier, StringLiteral, CharacterLiteral]); + } + + self.name_tail(); self.end_node(); } @@ -25,11 +62,449 @@ impl Parser { self.name() } - pub fn opt_label(&mut self) { + pub(crate) fn designator(&mut self) { + // TODO: That designator is not fully LRM compliant + self.expect_one_of_tokens([Identifier, StringLiteral, CharacterLiteral]); + } + + pub(crate) fn opt_label(&mut self) { if self.next_is(Identifier) && self.next_nth_is(Colon, 1) { self.start_node(Label); self.skip_n(2); self.end_node(); } } + pub(crate) fn name_list(&mut self) { + self.start_node(NameList); + self.separated_list(Parser::name, Comma); + self.end_node(); + } + + fn suffix(&mut self) { + // LRM §8.3 + // suffix ::= identifier | string_literal | character_literal | `all` ; + self.expect_one_of_tokens([ + Identifier, + StringLiteral, + CharacterLiteral, + Keyword(Kw::All), + ]); + } + + fn name_tail(&mut self) { + // name ::= prefix [ name_tail ] ; + // name_tail ::= selected_name | attribute_name | indexed_name | slice_name | function_name ; + // selected_name ::= `.` suffix [ name_tail ] ; + // attribute_name ::= [ signature ] `'` identifier [ `(` expression `)` ] [ name_tail ] ; + // function_name ::= `(` association_list `)` [ name_tail ] ; + // indexed_name ::= `(` expression { `,` expression } `)` [ name_tail ] ; + // slice_name ::= `(` discrete_range `)` [ name_tail ] ; + + if self.next_is(Dot) { + self.start_node(SelectedName); + self.expect_token(Dot); + self.suffix(); + self.end_node(); + self.name_tail(); + } else if self.next_is(LeftPar) { + // Instead of trying to differentiate between `subtype_indication`, `association_list`, a list of `expression`s and a `discrete_range` + // put all tokens inside the parenthesis in a `RawTokens` node. + self.start_node(RawTokens); + self.expect_token(LeftPar); + match self.lookahead([RightPar]) { + Ok((_, end_index)) => { + self.skip_to(end_index); + } + Err(_) => { + // TODO: The parenthesized expression is not terminated correctly + // Find some way to handle this gracefully! + self.eof_err(); + self.end_node(); + return; + } + } + self.expect_token(RightPar); + self.end_node(); + + self.name_tail(); + } else if is_start_of_attribute_name(self) { + self.start_node(AttributeName); + if self.next_is(LeftSquare) { + self.signature(); + } + self.expect_token(Tick); + + // `range` is a keyword, but may appear as an `attribute_name` + if !self.opt_identifier() { + self.expect_kw(Kw::Range); + } + + if self.next_is(LeftPar) { + self.start_node(ParenthesizedExpression); + self.expect_token(LeftPar); + self.expression(); + self.expect_token(RightPar); + self.end_node(); + } + self.end_node(); + self.name_tail(); + } + } + + pub fn external_name(&mut self) { + // LRM §8.7 + self.start_node(ExternalName); + self.expect_token(LtLt); + + self.expect_one_of_tokens([ + Keyword(Kw::Constant), + Keyword(Kw::Signal), + Keyword(Kw::Variable), + ]); + self.external_pathname(); + self.expect_token(Colon); + self.subtype_indication(); + + self.expect_token(GtGt); + self.end_node(); + } + + fn external_pathname(&mut self) { + // LRM §8.7 + self.start_node(ExternalPathName); + match_next_token!(self, + CommAt => { + self.expect_token(CommAt); + self.identifier(); + self.expect_token(Dot); + self.identifier(); + self.expect_token(Dot); + self.identifier(); + while self.opt_token(Dot) { + self.identifier(); + } + }, + Dot => { + self.expect_token(Dot); + self.partial_pathname(); + }, + Circ, Identifier => { + while self.opt_token(Circ) { + self.expect_token(Dot); + } + self.partial_pathname(); + }); + self.end_node(); + } + + fn partial_pathname(&mut self) { + // LRM §8.7 + // partial_pathname ::= { identifier [ `(` expression `)` ] `.` } identifier ; + self.identifier(); + loop { + if self.next_is(LeftPar) { + self.start_node(ParenthesizedExpression); + self.expect_token(LeftPar); + self.expression(); + self.expect_token(RightPar); + self.end_node(); + self.expect_token(Dot); + } else if !self.opt_token(Dot) { + break; + } + self.identifier(); + } + } +} + +#[cfg(test)] +mod tests { + use crate::parser::{test_utils::check, Parser}; + + #[test] + fn parse_name() { + check( + Parser::name, + "lib1.fn('a', 1, sig).vector(100 downto 10).all", + "\ +Name + Identifier 'lib1' + SelectedName + Dot + Identifier 'fn' + RawTokens + LeftPar + CharacterLiteral ''a'' + Comma + AbstractLiteral + Comma + Identifier 'sig' + RightPar + SelectedName + Dot + Identifier 'vector' + RawTokens + LeftPar + AbstractLiteral + Keyword(Downto) + AbstractLiteral + RightPar + SelectedName + Dot + Keyword(All) +", + ); + } + + #[test] + fn parse_external_name() { + check( + Parser::name, + "<< constant @lib.pkg.obj : std_ulogic >>", + "\ +Name + ExternalName + LtLt + Keyword(Constant) + ExternalPathName + CommAt + Identifier 'lib' + Dot + Identifier 'pkg' + Dot + Identifier 'obj' + Colon + Identifier 'std_ulogic' + GtGt +", + ); + + check( + Parser::name, + "<< variable .tb.sig : bit >>", + "\ +Name + ExternalName + LtLt + Keyword(Variable) + ExternalPathName + Dot + Identifier 'tb' + Dot + Identifier 'sig' + Colon + Identifier 'bit' + GtGt +", + ); + + check( + Parser::name, + "<< signal uut.sig : natural >>", + "\ +Name + ExternalName + LtLt + Keyword(Signal) + ExternalPathName + Identifier 'uut' + Dot + Identifier 'sig' + Colon + Identifier 'natural' + GtGt +", + ); + + check( + Parser::name, + "<< signal ^.up1_signal : real >>", + "\ +Name + ExternalName + LtLt + Keyword(Signal) + ExternalPathName + Circ + Dot + Identifier 'up1_signal' + Colon + Identifier 'real' + GtGt +", + ); + + check( + Parser::name, + "<>", + "\ +Name + ExternalName + LtLt + Keyword(Constant) + ExternalPathName + Circ + Dot + Circ + Dot + Circ + Dot + Circ + Dot + Identifier 'up4_signal' + Colon + Identifier 'integer' + GtGt +", + ); + + check( + Parser::name, + "<< constant .tb.uut.gen(1).sig : bit >>", + "\ +Name + ExternalName + LtLt + Keyword(Constant) + ExternalPathName + Dot + Identifier 'tb' + Dot + Identifier 'uut' + Dot + Identifier 'gen' + ParenthesizedExpression + LeftPar + Expression + SimpleExpression + AbstractLiteral + RightPar + Dot + Identifier 'sig' + Colon + Identifier 'bit' + GtGt +", + ); + } + + #[test] + fn parse_selected_name() { + check( + Parser::name, + "lib.pkg_outer.pkg_inner.obj", + "\ +Name + Identifier 'lib' + SelectedName + Dot + Identifier 'pkg_outer' + SelectedName + Dot + Identifier 'pkg_inner' + SelectedName + Dot + Identifier 'obj' +", + ); + + check( + Parser::name, + "pkg.all", + "\ +Name + Identifier 'pkg' + SelectedName + Dot + Keyword(All) +", + ); + } + + #[test] + fn parse_attribute_name() { + check( + Parser::name, + "obj'left", + "\ +Name + Identifier 'obj' + AttributeName + Tick + Identifier 'left' +", + ); + + check( + Parser::name, + "slv'range", + "\ +Name + Identifier 'slv' + AttributeName + Tick + Keyword(Range) +", + ); + + check( + Parser::name, + "slv'reverse_range", + "\ +Name + Identifier 'slv' + AttributeName + Tick + Identifier 'reverse_range' +", + ); + + check( + Parser::name, + "integer'image(obj)", + "\ +Name + Identifier 'integer' + AttributeName + Tick + Identifier 'image' + ParenthesizedExpression + LeftPar + Expression + SimpleExpression + Identifier 'obj' + RightPar +", + ); + + check( + Parser::name, + "ieee.numeric_std.to_unsigned[natural, natural return unsigned]'simple_name", + "\ +Name + Identifier 'ieee' + SelectedName + Dot + Identifier 'numeric_std' + SelectedName + Dot + Identifier 'to_unsigned' + AttributeName + Signature + LeftSquare + NameList + Name + Identifier 'natural' + Comma + Name + Identifier 'natural' + Keyword(Return) + Name + Identifier 'unsigned' + RightSquare + Tick + Identifier 'simple_name' +", + ); + } } diff --git a/vhdl_syntax/src/parser/productions/scalar_types.rs b/vhdl_syntax/src/parser/productions/scalar_types.rs new file mode 100644 index 00000000..99965f06 --- /dev/null +++ b/vhdl_syntax/src/parser/productions/scalar_types.rs @@ -0,0 +1,83 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at http://mozilla.org/MPL/2.0/. +// +// Copyright (c) 2024, Lukas Scheller lukasscheller@icloud.com +/// Parsing of scalar types +use crate::parser::Parser; +use crate::syntax::node_kind::NodeKind::*; +use crate::tokens::{Keyword as Kw, TokenKind::*, TokenStream}; + +impl Parser { + pub fn range(&mut self) { + self.range_bounded(usize::MAX); + } + fn range_bounded(&mut self, max_index: usize) { + // LRM §5.2.1 + + // `max_index` should point to the end of the range to parse (exclusive). + // This way the parser can use a bounded lookahead to distinguish between range expressions (using `to` or `downto`) and attribute names. + self.start_node(Range); + + let is_range_expression = self + .lookahead_max_token_index(max_index, [Keyword(Kw::To), Keyword(Kw::Downto)]) + .is_ok(); + + if is_range_expression { + self.simple_expression(); + self.expect_one_of_tokens([Keyword(Kw::To), Keyword(Kw::Downto)]); + self.simple_expression(); + } else { + self.name(); + } + + self.end_node(); + } +} + +#[cfg(test)] +mod tests { + use crate::parser::{test_utils::check, Parser}; + + #[test] + fn parse_range() { + check( + Parser::range, + "100 downto 10", + "\ +Range + SimpleExpression + AbstractLiteral + Keyword(Downto) + SimpleExpression + AbstractLiteral +", + ); + + check( + Parser::range, + "0 to 0", + "\ +Range + SimpleExpression + AbstractLiteral + Keyword(To) + SimpleExpression + AbstractLiteral +", + ); + + check( + Parser::range, + "slv32_t'range", + "\ +Range + Name + Identifier 'slv32_t' + AttributeName + Tick + Keyword(Range) +", + ); + } +} diff --git a/vhdl_syntax/src/parser/productions/signature.rs b/vhdl_syntax/src/parser/productions/signature.rs index fcb09f7e..d3375571 100644 --- a/vhdl_syntax/src/parser/productions/signature.rs +++ b/vhdl_syntax/src/parser/productions/signature.rs @@ -5,10 +5,106 @@ // Copyright (c) 2025, Lukas Scheller lukasscheller@icloud.com use crate::parser::Parser; +use crate::syntax::node_kind::NodeKind::*; +use crate::tokens::token_kind::Keyword as Kw; +use crate::tokens::token_kind::TokenKind::*; use crate::tokens::TokenStream; impl Parser { - pub fn signature(&self) { - unimplemented!() + pub fn signature(&mut self) { + // LRM §4.5.3 + // signature ::= `[` [ name { `,` name } ] [ `return` name ] `]`; + self.start_node(Signature); + self.expect_token(LeftSquare); + + if !self.next_is_one_of([Keyword(Kw::Return), RightSquare]) { + self.name_list(); + } + + if self.opt_token(Keyword(Kw::Return)) { + self.name(); + } + + self.expect_token(RightSquare); + self.end_node(); + } +} + +#[cfg(test)] +mod tests { + use crate::parser::{test_utils::check, Parser}; + + #[test] + fn parse_signature() { + check( + Parser::signature, + "[natural, bit return unsigned]", + "\ +Signature + LeftSquare + NameList + Name + Identifier 'natural' + Comma + Name + Identifier 'bit' + Keyword(Return) + Name + Identifier 'unsigned' + RightSquare +", + ); + + check( + Parser::signature, + "[]", + "\ +Signature + LeftSquare + RightSquare +", + ); + + check( + Parser::signature, + "[return ret_t]", + "\ +Signature + LeftSquare + Keyword(Return) + Name + Identifier 'ret_t' + RightSquare +", + ); + + check( + Parser::signature, + "[arg1_t, arg2_t]", + "\ +Signature + LeftSquare + NameList + Name + Identifier 'arg1_t' + Comma + Name + Identifier 'arg2_t' + RightSquare +", + ); + + check( + Parser::signature, + "[arg1_t]", + "\ +Signature + LeftSquare + NameList + Name + Identifier 'arg1_t' + RightSquare +", + ); } } diff --git a/vhdl_syntax/src/parser/util.rs b/vhdl_syntax/src/parser/util.rs index 5c32f3f3..425e5cae 100644 --- a/vhdl_syntax/src/parser/util.rs +++ b/vhdl_syntax/src/parser/util.rs @@ -67,18 +67,28 @@ impl Parser { pub(crate) fn skip(&mut self) { if let Some(token) = self.tokenizer.next() { self.builder.push(token); + self.token_index += 1; } } pub(crate) fn skip_n(&mut self, n: usize) { for _ in 0..n { - self.skip() + self.skip(); + if self.peek_token().is_none() { + break; + } } } + pub(crate) fn skip_to(&mut self, token_index: usize) { + assert!(token_index > self.token_index); + self.skip_n(token_index - self.token_index); + } + pub(crate) fn expect_token(&mut self, kind: TokenKind) { if let Some(token) = self.tokenizer.next_if(|token| token.kind() == kind) { self.builder.push(token); + self.token_index += 1; return; } // TODO: what are possible recovery strategies? @@ -115,6 +125,13 @@ impl Parser { self.peek_token() == Some(kind) } + pub(crate) fn next_is_one_of(&self, kinds: [TokenKind; N]) -> bool { + match self.peek_token() { + Some(tok) => kinds.contains(&tok), + None => false, + } + } + pub(crate) fn next_nth_is(&self, kind: TokenKind, n: usize) -> bool { self.peek_nth_token(n) == Some(kind) } @@ -126,6 +143,7 @@ impl Parser { pub(crate) fn opt_token(&mut self, kind: TokenKind) -> bool { if let Some(token) = self.tokenizer.next_if(|token| token.kind() == kind) { self.builder.push(token); + self.token_index += 1; true } else { false @@ -142,6 +160,7 @@ impl Parser { { let kind = token.kind(); self.builder.push(token); + self.token_index += 1; Some(kind) } else { None @@ -174,4 +193,57 @@ impl Parser { pub(crate) fn end(self) -> (GreenNode, Vec) { (self.builder.end(), self.diagnostics) } + + pub(crate) fn lookahead( + &mut self, + kinds: [TokenKind; N], + ) -> Result<(TokenKind, usize), usize> { + self.lookahead_max_token_index(usize::MAX, kinds) + } + + /// Lookahead in the current token stream until one of the given `TokenKind`s are found. + /// In case of success, the matching `TokenKind` is returned, as well as the token index it was found at. + /// In case of an error (EOF or a nesting error) the index at which the lookahead ended is returned. + /// + /// TODO: For better error handling you probably will need a way to differentiate between EOF and nesting errors! + pub(crate) fn lookahead_max_token_index( + &mut self, + maximum_index: usize, + kinds: [TokenKind; N], + ) -> Result<(TokenKind, usize), usize> { + let mut length = 0; + let mut paren_count = 0; + + while self.token_index + length <= maximum_index && paren_count >= 0 { + match self.peek_nth_token(length) { + Some(TokenKind::LeftPar) => paren_count += 1, + Some(TokenKind::RightPar) => { + // Allow the closing parenthesis to match as well + if paren_count == 0 && kinds.contains(&TokenKind::RightPar) { + return Ok((TokenKind::RightPar, self.token_index + length)); + } + + paren_count -= 1; + + // A closing parenthesis indicates that some form of + // grouping ended that was not started during this lookahead. + if paren_count < 0 { + return Err(self.token_index + length); + } + } + + Some(tok) => { + // To avoid matching tokens in some (potentially recursive) sub expression of some sort, + // only check the current token if we at the outer most grouping layer (`paren_count == 0`). + if paren_count == 0 && kinds.contains(&tok) { + return Ok((tok, self.token_index + length)); + } + } + None => return Err(self.token_index + length), + } + length += 1; + } + + Err(self.token_index + length) + } } diff --git a/vhdl_syntax/src/syntax/node_kind.rs b/vhdl_syntax/src/syntax/node_kind.rs index daba6b29..2cd568c2 100644 --- a/vhdl_syntax/src/syntax/node_kind.rs +++ b/vhdl_syntax/src/syntax/node_kind.rs @@ -24,6 +24,9 @@ pub enum NodeKind { DesignUnit, DesignFile, ContextClause, + LibraryClause, + UseClause, + ContextReference, GenericClause, PortClause, InterfaceList, @@ -34,5 +37,22 @@ pub enum NodeKind { EntityDesignator, Label, BlockStatement, - InterfaceObjectDeclaration, // ... + InterfaceObjectDeclaration, + ParenthesizedExpression, + Expression, + SimpleExpression, + ExpressionList, + Range, + SelectedName, + ExternalName, + ExternalPathName, + AttributeName, + FunctionCallOrIndexedName, + SliceName, + RawTokens, + NameList, + AssociationList, + AssociationElement, + FormalPart, + ActualPart, // ... }