Skip to content

Commit

Permalink
Merge pull request #9 from SWAT-engineering/better-rascal-grammar
Browse files Browse the repository at this point in the history
Generate proper precede requirement based on layout follow restriction
  • Loading branch information
sungshik authored Aug 23, 2024
2 parents cd31e74 + 654cb88 commit 6df491d
Show file tree
Hide file tree
Showing 6 changed files with 164 additions and 16 deletions.
16 changes: 14 additions & 2 deletions rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import util::Math;

import lang::oniguruma::RegExp;
import lang::rascal::grammar::Util;
import lang::rascal::grammar::analyze::Symbols;

@synopsis{
Converts a set/list of values (presumably: productions, symbols, or
Expand All @@ -25,10 +26,21 @@ list[RegExp] toRegExps(Grammar g, list[value] values)
= [toRegExp(g, v) | v <- values];

@synopsis{
Converts a production to a regular expression.
Converts a production to a regular expression, optionally with a
grammar-dependent `\precede` guard (default: `false`)
}

RegExp toRegExp(Grammar g, prod(_, symbols, attributes)) {
RegExp toRegExp(Grammar g, prod(def, symbols, attributes), bool guard = false) {
if (guard && delabel(def) in g.rules && {\conditional(_, conditions)} := precede(g, def)) {
set[Symbol] alternatives
= {s | \not-follow(s) <- conditions}
+ {\conditional(\empty(), {\begin-of-line()})};

Condition guard = \precede(\alt(alternatives));
Symbol guarded = \conditional(\seq(symbols), {guard});
return toRegExp(g, prod(def, [guarded], attributes));
}

RegExp re = infix("", toRegExps(g, symbols)); // Empty separator for concatenation
return /\tag("category"(c)) := attributes ? group(re, category = c) : re;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
@synopsis{
Types and functions to analyze symbols
}

@description{
Note: Some functions in this module seemingly overlap with those in module
`lang::rascal::grammar::Lookahead` (i.e., computation of first/follow sets).
However, only symbols of the form `\char-class(_)` are considered terminals
in that module, which is too strict for the purpose of this project.
}

// TODO: The analysis of delimiters in module
// `lang::rascal::grammar::analyze::Delimiters` can probably be rewritten (less
// code) to use functions in this module.

module lang::rascal::grammar::analyze::Symbols

import Grammar;
import ParseTree;
import util::Maybe;

import lang::rascal::grammar::Util;

@synopsis{
Representation of a traversal direction along a list of symbols
}

data Direction // Traverse lists of symbols (in productions)...
= forward() // - ...from left to right;
| backward() // - ...from right to left.
;

private list[&T] reorder(list[&T] l, forward()) = l;
private list[&T] reorder(list[&T] l, backward()) = reverse(l);

@synopsis{
Computes the *last* set of symbol `s` in grammar `g`
}

set[Symbol] last(Grammar g, Symbol s)
= unmaybe(firstBySymbol(g, isTerminal, backward())[delabel(s)]);

@synopsis{
Computes the *first* set of symbol `s` in grammar `g`
}

set[Symbol] first(Grammar g, Symbol s)
= unmaybe(firstBySymbol(g, isTerminal, forward())[delabel(s)]);

@memo
private map[Symbol, Maybe[set[Symbol]]] firstBySymbol(Grammar g, bool(Symbol) predicate, Direction dir) {
map[Symbol, Maybe[set[Symbol]]] ret
= (delabel(s): nothing() | s <- g.rules) // Non-terminals
+ (delabel(s): nothing() | /prod(_, [*_, s, *_], _) := g, !isNonTerminalType(s)); // Terminals
Maybe[set[Symbol]] firstOf([])
= just({});
Maybe[set[Symbol]] firstOf([h, *t])
= \set: just({\empty(), *_}) := ret[delabel(h)]
? union(\set, firstOf(t))
: ret[delabel(h)];
solve (ret) {
for (s <- ret, nothing() == ret[s]) {
if (predicate(s)) {
ret[s] = just({s});
} else if (list[Production] prods: [_, *_] := lookup(g, s)) {
ret[s] = (just({}) | union(it, firstOf(reorder(p.symbols, dir))) | p <- prods);
} else {
ret[s] = just({\empty()});
}
}
}
return ret;
}
@synopsis{
Computes the *precede* set of symbol `s` in grammar `g`
}
set[Symbol] precede(Grammar g, Symbol s)
= unmaybe(followBySymbol(g, isTerminal, backward())[delabel(s)]);
@synopsis{
Computes the *follow* set of symbol `s` in grammar `g`
}
set[Symbol] follow(Grammar g, Symbol s)
= unmaybe(followBySymbol(g, isTerminal, forward())[delabel(s)]);
@memo
private map[Symbol, Maybe[set[Symbol]]] followBySymbol(Grammar g, bool(Symbol) predicate, Direction dir) {
map[Symbol, Maybe[set[Symbol]]] ret = (delabel(s): nothing() | s <- g.rules); // Non-terminals
Maybe[set[Symbol]] followOf(Symbol parent, [])
= ret[delabel(parent)];
Maybe[set[Symbol]] followOf(Symbol parent, [h, *t])
= just({\empty(), *rest}) := firstBySymbol(g, predicate, dir)[delabel(h)]
? union(just(rest), followOf(parent, t))
: firstBySymbol(g, predicate, dir)[delabel(h)];
solve (ret) {
for (s <- ret, nothing() == ret[s]) {
ret[s] = just({});
for (/prod(def, symbols, _) := g, [*_, t, *after] := reorder(symbols, dir), s == delabel(t)) {
ret[s] = union(ret[s], followOf(def, after));
}
}
}
return ret;
}
private set[Symbol] unmaybe(just(set[Symbol] \set))
= \set;
private set[Symbol] unmaybe(nothing())
= {};
private Maybe[set[Symbol]] union(just(set[Symbol] \set1), just(set[Symbol] \set2))
= just(\set1 + \set2);
private default Maybe[set[Symbol]] union(Maybe[set[Symbol]] _, Maybe[set[Symbol]] _)
= nothing();
@synopsis{
Checks if symbol `s` is a terminal
}
bool isTerminal(Symbol s)
= !isNonTerminalType(s);
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ TmRule toTmRule(ConversionUnit u, NameGenerator g)
private TmRule toTmRule(RscGrammar rsc, p: prod(def, _, _), str name)
= !isSynthetic(def) && <just(begin), just(end)> := getOuterDelimiterPair(rsc, p)
? toTmRule(toRegExp(rsc, begin), toRegExp(rsc, end), "<begin.string><end.string>", [toTmRule(toRegExp(rsc, p), name)])
: toTmRule(toRegExp(rsc, p), name);
: toTmRule(toRegExp(rsc, p, guard = true), name);
private TmRule toTmRule(RegExp re, str name)
= match(re.string, captures = toCaptures(re.categories), name = name);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ syntax Expression
;

lexical Id = [a-z][a-z0-9]* !>> [a-z0-9];
lexical Natural = [0-9]+;
lexical Natural = [0-9]+ !>> [0-9];
lexical String = "\"" ![\"]* "\"";

layout Layout = WhitespaceAndComment* !>> [\ \t\n\r%];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@
# ^^^ variable.other
# ^ -variable.other
# ^ -constant.numeric
# ^^^ -constant.numeric

foo 123
# ^^^ variable.other
# ^ -variable.other
# ^ -constant.numeric
# ^^^ constant.numeric

natural: natural;
Expand Down
Loading

0 comments on commit 6df491d

Please sign in to comment.