Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generate proper precede requirement based on layout follow restriction #9

Merged
merged 8 commits into from
Aug 23, 2024
16 changes: 14 additions & 2 deletions rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import util::Math;

import lang::oniguruma::RegExp;
import lang::rascal::grammar::Util;
import lang::rascal::grammar::analyze::Symbols;

@synopsis{
Converts a set/list of values (presumably: productions, symbols, or
Expand All @@ -25,10 +26,21 @@ list[RegExp] toRegExps(Grammar g, list[value] values)
= [toRegExp(g, v) | v <- values];

@synopsis{
Converts a production to a regular expression.
Converts a production to a regular expression, optionally with a
grammar-dependent `\precede` guard (default: `false`)
}

RegExp toRegExp(Grammar g, prod(_, symbols, attributes)) {
RegExp toRegExp(Grammar g, prod(def, symbols, attributes), bool guard = false) {
if (guard && delabel(def) in g.rules && {\conditional(_, conditions)} := precede(g, def)) {
set[Symbol] alternatives
= {s | \not-follow(s) <- conditions}
+ {\conditional(\empty(), {\begin-of-line()})};

Condition guard = \precede(\alt(alternatives));
Symbol guarded = \conditional(\seq(symbols), {guard});
return toRegExp(g, prod(def, [guarded], attributes));
}

RegExp re = infix("", toRegExps(g, symbols)); // Empty separator for concatenation
return /\tag("category"(c)) := attributes ? group(re, category = c) : re;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
@synopsis{
Types and functions to analyze symbols
}

@description{
Note: Some functions in this module seemingly overlap with those in module
`lang::rascal::grammar::Lookahead` (i.e., computation of first/follow sets).
However, only symbols of the form `\char-class(_)` are considered terminals
in that module, which is too strict for the purpose of this project.
}

// TODO: The analysis of delimiters in module
// `lang::rascal::grammar::analyze::Delimiters` can probably be rewritten (less
// code) to use functions in this module.

module lang::rascal::grammar::analyze::Symbols

import Grammar;
import ParseTree;
import util::Maybe;

import lang::rascal::grammar::Util;

@synopsis{
Representation of a traversal direction along a list of symbols
}

data Direction // Traverse lists of symbols (in productions)...
= forward() // - ...from left to right;
| backward() // - ...from right to left.
;

private list[&T] reorder(list[&T] l, forward()) = l;
private list[&T] reorder(list[&T] l, backward()) = reverse(l);

@synopsis{
Computes the *last* set of symbol `s` in grammar `g`
}

set[Symbol] last(Grammar g, Symbol s)
= unmaybe(firstBySymbol(g, isTerminal, backward())[delabel(s)]);

@synopsis{
Computes the *first* set of symbol `s` in grammar `g`
}

set[Symbol] first(Grammar g, Symbol s)
= unmaybe(firstBySymbol(g, isTerminal, forward())[delabel(s)]);

@memo
private map[Symbol, Maybe[set[Symbol]]] firstBySymbol(Grammar g, bool(Symbol) predicate, Direction dir) {
map[Symbol, Maybe[set[Symbol]]] ret
= (delabel(s): nothing() | s <- g.rules) // Non-terminals
+ (delabel(s): nothing() | /prod(_, [*_, s, *_], _) := g, !isNonTerminalType(s)); // Terminals

Maybe[set[Symbol]] firstOf([])
= just({});
Maybe[set[Symbol]] firstOf([h, *t])
= \set: just({\empty(), *_}) := ret[delabel(h)]
? union(\set, firstOf(t))
: ret[delabel(h)];

solve (ret) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: solve returns the solved value, so this can become: return solve(ret)...

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I played a bit with this just now, but it seems solve doesn't always return a value (it doesn't in the cases of this PR). I submitted an issue basically to update the docs (it seems by design instead of a bug).

for (s <- ret, nothing() == ret[s]) {
if (predicate(s)) {
ret[s] = just({s});
} else if (list[Production] prods: [_, *_] := lookup(g, s)) {
ret[s] = (just({}) | union(it, firstOf(reorder(p.symbols, dir))) | p <- prods);
} else {
ret[s] = just({\empty()});
}
}
}

return ret;
}

@synopsis{
Computes the *precede* set of symbol `s` in grammar `g`
}

set[Symbol] precede(Grammar g, Symbol s)
= unmaybe(followBySymbol(g, isTerminal, backward())[delabel(s)]);

@synopsis{
Computes the *follow* set of symbol `s` in grammar `g`
}

set[Symbol] follow(Grammar g, Symbol s)
= unmaybe(followBySymbol(g, isTerminal, forward())[delabel(s)]);

@memo
private map[Symbol, Maybe[set[Symbol]]] followBySymbol(Grammar g, bool(Symbol) predicate, Direction dir) {
map[Symbol, Maybe[set[Symbol]]] ret = (delabel(s): nothing() | s <- g.rules); // Non-terminals

Maybe[set[Symbol]] followOf(Symbol parent, [])
= ret[delabel(parent)];
Maybe[set[Symbol]] followOf(Symbol parent, [h, *t])
= just({\empty(), *rest}) := firstBySymbol(g, predicate, dir)[delabel(h)]
? union(just(rest), followOf(parent, t))
: firstBySymbol(g, predicate, dir)[delabel(h)];

solve (ret) {
for (s <- ret, nothing() == ret[s]) {
ret[s] = just({});
for (/prod(def, symbols, _) := g, [*_, t, *after] := reorder(symbols, dir), s == delabel(t)) {
ret[s] = union(ret[s], followOf(def, after));
}
}
}

return ret;
}

private set[Symbol] unmaybe(just(set[Symbol] \set))
= \set;
private set[Symbol] unmaybe(nothing())
= {};

private Maybe[set[Symbol]] union(just(set[Symbol] \set1), just(set[Symbol] \set2))
= just(\set1 + \set2);
private default Maybe[set[Symbol]] union(Maybe[set[Symbol]] _, Maybe[set[Symbol]] _)
= nothing();

@synopsis{
Checks if symbol `s` is a terminal
}

bool isTerminal(Symbol s)
= !isNonTerminalType(s);
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ TmRule toTmRule(ConversionUnit u, NameGenerator g)
private TmRule toTmRule(RscGrammar rsc, p: prod(def, _, _), str name)
= !isSynthetic(def) && <just(begin), just(end)> := getOuterDelimiterPair(rsc, p)
? toTmRule(toRegExp(rsc, begin), toRegExp(rsc, end), "<begin.string><end.string>", [toTmRule(toRegExp(rsc, p), name)])
: toTmRule(toRegExp(rsc, p), name);
: toTmRule(toRegExp(rsc, p, guard = true), name);

private TmRule toTmRule(RegExp re, str name)
= match(re.string, captures = toCaptures(re.categories), name = name);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ syntax Expression
;

lexical Id = [a-z][a-z0-9]* !>> [a-z0-9];
lexical Natural = [0-9]+;
lexical Natural = [0-9]+ !>> [0-9];
lexical String = "\"" ![\"]* "\"";

layout Layout = WhitespaceAndComment* !>> [\ \t\n\r%];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@
# ^^^ variable.other
# ^ -variable.other
# ^ -constant.numeric
# ^^^ -constant.numeric

foo 123
# ^^^ variable.other
# ^ -variable.other
# ^ -constant.numeric
# ^^^ constant.numeric

natural: natural;
Expand Down
Loading