This repository has been archived by the owner on May 1, 2024. It is now read-only.
forked from GerHobbelt/ebnf-parser
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbnf.l
108 lines (89 loc) · 6.01 KB
/
bnf.l
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
NAME [a-zA-Z_](?:[a-zA-Z0-9_-]*[a-zA-Z0-9_])?
ID [a-zA-Z_][a-zA-Z0-9_]*
DECIMAL_NUMBER [1-9][0-9]*
HEX_NUMBER "0"[xX][0-9a-fA-F]+
BR \r\n|\n|\r
%x action code path options
%s token
%s bnf ebnf
%options easy_keyword_rules
%options ranges
%%
<token>\r|\n this.popState();
<token>"%%" this.popState();
<token>";" this.popState();
<bnf,ebnf>"%%" this.pushState('code'); return '%%';
<ebnf>"(" return '(';
<ebnf>")" return ')';
<ebnf>"*" return '*';
<ebnf>"?" return '?';
<ebnf>"+" return '+';
<options>{NAME} return 'NAME';
<options>"=" return '=';
<options>\"("\\\\"|'\"'|[^"])*\" yytext = yytext.substr(1, yytext.length - 2); return 'OPTION_VALUE';
<options>"'"("\\\\"|"\'"|[^'])*"'" yytext = yytext.substr(1, yytext.length - 2); return 'OPTION_VALUE';
<options>[^\s\r\n]+ return 'OPTION_VALUE';
<options>{BR}+ this.popState(); return 'OPTIONS_END';
<options>\s+{BR}+ this.popState(); return 'OPTIONS_END';
<options>\s+ /* empty */
\s+ /* skip whitespace */
"//".* /* skip comment */
"/*"(.|\n|\r)*?"*/" /* skip comment */
"["{ID}"]" yytext = yytext.substr(1, yyleng - 2); return 'ALIAS';
{ID} return 'ID';
'"'[^"]+'"' yytext = yytext.substr(1, yyleng - 2); return 'STRING';
"'"[^']+"'" yytext = yytext.substr(1, yyleng - 2); return 'STRING';
<token>[^\s\r\n]+ return 'TOKEN_WORD';
":" return ':';
";" return ';';
"|" return '|';
"%%" this.pushState(ebnf ? 'ebnf' : 'bnf'); return '%%';
"%ebnf" if (!yy.options) { yy.options = {}; } ebnf = yy.options.ebnf = true;
"%debug" if (!yy.options) { yy.options = {}; } yy.options.debug = true;
"%parser-type" return 'PARSER_TYPE';
"%prec" return 'PREC';
"%start" return 'START';
"%left" return 'LEFT';
"%right" return 'RIGHT';
"%nonassoc" return 'NONASSOC';
"%token" this.pushState('token'); return 'TOKEN';
"%parse-param" return 'PARSE_PARAM';
"%options" this.pushState('options'); return 'OPTIONS';
"%lex"[\w\W]*?{BR}\s*"/lex"\b return 'LEX_BLOCK';
"%import" return 'IMPORT';
<INITIAL,ebnf,bnf,code>"%include" this.pushState('path'); return 'INCLUDE';
"%"{NAME}[^\r\n]* %{
/* ignore unrecognized decl */
console.warn('ignoring unsupported parser option: ', yytext, ' while lexing in ', this.topState(), ' state');
return 'UNKNOWN_DECL';
%}
"<"{ID}">" yytext = yytext.substr(1, yyleng - 2); return 'TOKEN_TYPE';
"{{"[\w\W]*?"}}" yytext = yytext.substr(2, yyleng - 4); return 'ACTION';
"%{"(.|\r|\n)*?"%}" yytext = yytext.substr(2, yytext.length - 4); return 'ACTION';
"{" yy.depth = 0; this.pushState('action'); return '{';
"->".* yytext = yytext.substr(2, yyleng - 2); return 'ARROW_ACTION';
{HEX_NUMBER} yytext = parseInt(yytext, 16); return 'INTEGER';
{DECIMAL_NUMBER}(?![xX0-9a-fA-F]) yytext = parseInt(yytext, 10); return 'INTEGER';
. %{
throw new Error("unsupported input character: " + yytext + " @ " + JSON.stringify(yylloc)); /* b0rk on bad characters */
%}
<*><<EOF>> return 'EOF';
<action>"/*"(.|\n|\r)*?"*/" return 'ACTION_BODY';
<action>"//".* return 'ACTION_BODY';
<action>"/"[^ /]*?['"{}'][^ ]*?"/" return 'ACTION_BODY'; // regexp with braces or quotes (and no spaces)
<action>\"("\\\\"|'\"'|[^"])*\" return 'ACTION_BODY';
<action>"'"("\\\\"|"\'"|[^'])*"'" return 'ACTION_BODY';
<action>[/"'][^{}/"']+ return 'ACTION_BODY';
<action>[^{}/"']+ return 'ACTION_BODY';
<action>"{" yy.depth++; return '{';
<action>"}" if (yy.depth === 0) { this.popState(); } else { yy.depth--; } return '}';
// in the trailing CODE block, only accept these `%include` macros when they appear at the start of a line
// and make sure the rest of lexer regexes account for this one so it'll match that way only:
<code>[^\r\n]*(\r|\n)+ return 'CODE';
<code>[^\r\n]+ return 'CODE'; // the bit of CODE just before EOF...
<path>[\r\n] this.popState(); this.unput(yytext);
<path>"'"[^\r\n]+"'" yytext = yytext.substr(1, yyleng - 2); this.popState(); return 'PATH';
<path>'"'[^\r\n]+'"' yytext = yytext.substr(1, yyleng - 2); this.popState(); return 'PATH';
<path>\s+ // skip whitespace in the line
<path>[^\s\r\n]+ this.popState(); return 'PATH';
%%