diff --git a/rascal-textmate-core/.editorconfig b/rascal-textmate-core/.editorconfig
new file mode 100644
index 0000000..6eeed87
--- /dev/null
+++ b/rascal-textmate-core/.editorconfig
@@ -0,0 +1,21 @@
+# Editor configuration, see http://editorconfig.org
+root = true
+
+[*]
+charset = utf-8
+indent_style = space
+indent_size = 2
+insert_final_newline = true
+trim_trailing_whitespace = true
+max_line_length = 80
+
+[*.sh]
+end_of_line = lf
+
+[*.java]
+indent_size = 4
+max_line_length = 120
+
+[*.rsc]
+indent_size = 4
+max_line_length = 120
diff --git a/rascal-textmate-core/.gitignore b/rascal-textmate-core/.gitignore
index 5feb907..8ec187b 100644
--- a/rascal-textmate-core/.gitignore
+++ b/rascal-textmate-core/.gitignore
@@ -1,2 +1,4 @@
target
-node_modules
\ No newline at end of file
+node_modules
+
+src/main/rascal/Scratch.rsc
\ No newline at end of file
diff --git a/rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc b/rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc
index 98e0fdd..900068b 100644
--- a/rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc
+++ b/rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc
@@ -60,7 +60,7 @@ RegExp toRegExp(Grammar g, list[Symbol] symbols, set[Attr] attributes) {
RegExp toRegExp(Grammar g, \label(_, symbol))
= toRegExp(g, symbol);
RegExp toRegExp(Grammar g, \parameter(_, _)) {
- throw "Presumably unreachable..."; } // Covered by `lookup` (which substitutes actuals for formals)
+ throw "Presumably unreachable..."; } // Covered by `prodsOf` (which substitutes actuals for formals)
// `ParseTree`: Start
RegExp toRegExp(Grammar g, \start(symbol))
@@ -68,7 +68,7 @@ RegExp toRegExp(Grammar g, \start(symbol))
// `ParseTree`: Non-terminals
RegExp toRegExp(Grammar g, Symbol s)
- = infix("|", [toRegExp(g, p) | p <- lookup(g, s)]) when isNonTerminalType(s);
+ = infix("|", [toRegExp(g, p) | p <- prodsOf(g, s)]) when isNonTerminalType(s);
// `ParseTree`: Terminals
RegExp toRegExp(Grammar _, \lit(string))
@@ -103,7 +103,7 @@ RegExp toRegExp(Grammar g, \conditional(symbol, conditions)) {
prefixConditions = [c | c <- conditions, isPrefixCondition(c)];
suffixConditions = [c | c <- conditions, isSuffixCondition(c)];
deleteConditions = [c | c <- conditions, isDeleteCondition(c)];
-
+
// Convert except conditions (depends on previous conversion)
if (_ <- exceptConditions) {
if (/\choice(symbol, alternatives) := g) {
@@ -112,7 +112,7 @@ RegExp toRegExp(Grammar g, \conditional(symbol, conditions)) {
= \label(l, _) := def
? \except(l) notin exceptConditions
: true;
-
+
re = infix("|", toRegExps(g, {a | a <- alternatives, keep(a)}));
}
}
@@ -130,7 +130,7 @@ RegExp toRegExp(Grammar g, \conditional(symbol, conditions)) {
// Convert delete conditions (depends on previous conversions)
if (_ <- deleteConditions) {
RegExp delete = infix("|", [toRegExp(g, s) | \delete(s) <- deleteConditions]);
-
+
// TODO: Explain this complicated conversion...
str string = "(?=(?\
)(?\.*)$)(?!(?:)\\k\$)\\k\";
list[str] categories = ["", *re.categories, "", *delete.categories];
@@ -196,7 +196,7 @@ str encode(int char) = preEncoded[char] ? "\\x{}";
private set[int] charRange(str from, str to) = {*[charAt(from, 0)..charAt(to, 0) + 1]};
private str toHex(int i)
- = i < 16
+ = i < 16
? hex[i]
: toHex(i / 16) + toHex(i % 16);
diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc
index 42476c2..eb40a3a 100644
--- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc
+++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc
@@ -37,18 +37,18 @@ bool tryParse(Grammar g, Symbol s, str input, bool allowAmbiguity = false) {
Checks if symbol `s` is recursive in grammar `g`
}
-bool isRecursive(Grammar g, Symbol s) {
- set[Symbol] getChildren(Symbol s)
- = {s | p <- lookup(g, s), /Symbol s := p.symbols};
+bool isRecursive(Grammar g, Symbol s, set[Symbol] checking = {})
+ = s in checking || any(p <- prodsOf(g, delabel(s)),
+ /Symbol child := p.symbols,
+ isRecursive(g, child, checking = checking + s));
- bool check(set[Symbol] checking, Symbol s)
- = s in checking
- ? true
- : any(child <- getChildren(s), check(checking + s, child));
-
- return check({}, s);
+@synopsis{
+ Checks if production `p` is recursive in grammar `g`
}
+bool isRecursive(Grammar g, Production p)
+ = any(/Symbol s := p.symbols, isRecursive(g, s));
+
@synopsis{
Representation of a pointer to a symbol in (the list of symbols of) a
production. This is useful to distinguish between different occurrences of
@@ -70,7 +70,7 @@ alias Pointer = tuple[Production p, int index];
```
lexical X = Y;
- lexical Y = alt1: "[" "[" "[" Z1 "]" "]" "]" | alt2: "<" Z2 ">";
+ lexical Y = alt1: "[" "[" "[" Z1 "]" "]" "]" | alt2: "<" Z2 ">";
lexical Z1 = "foo" "bar";
lexical Z2 = "baz";
```
@@ -80,7 +80,7 @@ alias Pointer = tuple[Production p, int index];
- ``
- ``
- ``
-
+
The list of pointers to `"qux"` is just empty.
}
@@ -92,7 +92,7 @@ list[Pointer] find(Grammar g, Production p, Symbol s, Direction dir = forward())
if (ith == needle) {
return [];
}
- for (isNonTerminalType(ith), child <- lookup(g, ith)) {
+ for (isNonTerminalType(ith), child <- prodsOf(g, ith)) {
if (list[Pointer] l: [_, *_] := doFind(doing + haystack, child, s)) {
return [] + l;
}
@@ -106,19 +106,26 @@ list[Pointer] find(Grammar g, Production p, Symbol s, Direction dir = forward())
}
@synopsis{
- Lookups a list of productions for symbol `s` in grammar `g`, replacing
+ Gets the list of productions that contain symbol `s` in grammar `g`
+}
+
+set[Production] prodsWith(Grammar g, Symbol s)
+ = {parent | /parent: prod(_, /Symbol _: s, _) := g};
+
+@synopsis{
+ Gets the list of productions of symbol `s` in grammar `g`, replacing
formal parameters with actual parameters when needed
}
-list[Production] lookup(Grammar g, s: \parameterized-sort(name, actual))
+list[Production] prodsOf(Grammar g, s: \parameterized-sort(name, actual))
= [subst(p, formal, actual) | /p: prod(\parameterized-sort(name, formal), _, _) := g.rules[s] ? []]
+ [subst(p, formal, actual) | /p: prod(label(_, \parameterized-sort(name, formal)), _, _) := g.rules[s] ? []];
-list[Production] lookup(Grammar g, s: \parameterized-lex(name, actual))
+list[Production] prodsOf(Grammar g, s: \parameterized-lex(name, actual))
= [subst(p, formal, actual) | /p: prod(\parameterized-lex(name, formal), _, _) := g.rules[s] ? []]
+ [subst(p, formal, actual) | /p: prod(label(_, \parameterized-lex(name, formal)), _, _) := g.rules[s] ? []];
-default list[Production] lookup(Grammar g, Symbol s)
+default list[Production] prodsOf(Grammar g, Symbol s)
= [p | /p: prod(s, _, _) := g.rules[s] ? []]
+ [p | /p: prod(label(_, s), _, _) := g.rules[s] ? []];
@@ -130,7 +137,7 @@ default list[Production] lookup(Grammar g, Symbol s)
&T subst(&T t, list[Symbol] from, list[Symbol] to)
= subst(t, toMapUnique(zip2(from, to)))
when size(from) == size(to);
-
+
private &T subst(&T t, map[Symbol, Symbol] m)
= visit (t) { case Symbol s => m[s] when s in m };
diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Categories.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Categories.rsc
new file mode 100644
index 0000000..9d42b7a
--- /dev/null
+++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Categories.rsc
@@ -0,0 +1,47 @@
+module lang::rascal::grammar::analyze::Categories
+
+import Grammar;
+import ParseTree;
+
+import lang::rascal::grammar::Util;
+
+@synopsis{
+ Special value to indicate that a production has no category
+}
+
+public str NO_CATEGORY = "";
+
+@synopsis{
+ Gets a set of categories such that, for each category, there exists a string
+ with that category produced by production `p`, as part of a string produced
+ by a start production of grammar `g`
+}
+
+set[str] getCategories(Grammar g, Production p)
+ = getCategoriesByProduction(g)[p];
+
+@memo
+private map[Production, set[str]] getCategoriesByProduction(Grammar g) {
+ map[Production, set[str]] ret = (p: {} | /p: prod(_, _, _) := g);
+
+ void doGet(Production p, set[str] parentCategories) {
+ set[str] categories = {c | /\tag("category"(str c)) := p};
+
+ set[str] old = ret[p];
+ set[str] new = _ <- categories ? categories : old + parentCategories;
+ ret[p] = new;
+
+ // If the new categories of `p` are different from the old ones, then
+ // propagate these changes to the children of `p`
+ for (old != new, /Symbol s := p.symbols, child <- prodsOf(g, delabel(s))) {
+ doGet(child, new);
+ }
+ }
+
+ // Propagate categories from the roots of the grammar
+ for (root: prod(\start(_), _, _) <- ret) {
+ doGet(root, {NO_CATEGORY});
+ }
+
+ return ret;
+}
\ No newline at end of file
diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc
index 1b15067..d7b40f9 100644
--- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc
+++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc
@@ -49,7 +49,7 @@ DelimiterPair getInnerDelimiterPair(Grammar g, Symbol s, bool getOnlyFirst = fal
```
lexical X = Y;
lexical Y = Y1 | Y2;
- lexical Y1 = "[" Z "]";
+ lexical Y1 = "[" Z "]";
lexical Y2 = "[" Z ")" [a-z];
lexical Z = [a-z];
```
@@ -83,7 +83,7 @@ private map[Symbol, Maybe[Symbol]] getInnerDelimiterBySymbol(Grammar g, Directio
@memo
private map[Production, Maybe[Symbol]] getInnerDelimiterByProduction(Grammar g, Direction direction, bool getOnlyFirst = false) {
map[Production, Maybe[Symbol]] ret = (p: nothing() | /p: prod(_, _, _) := g);
-
+
solve (ret) {
for (p <- ret, ret[p] == nothing()) {
for (s <- reorder(p.symbols, direction)) {
@@ -108,7 +108,7 @@ private map[Production, Maybe[Symbol]] getInnerDelimiterByProduction(Grammar g,
}
private set[Production] getChildren(Grammar g, Symbol s)
- = {*lookup(g, s)};
+ = {*prodsOf(g, s)};
@synopsis{
Gets the unique rightmost delimiter (`begin`) and the unique leftmost
@@ -122,7 +122,7 @@ private set[Production] getChildren(Grammar g, Symbol s)
```
lexical X = Y;
lexical Y = Y1 | Y2;
- lexical Y1 = "[" Z "]";
+ lexical Y1 = "[" Z "]";
lexical Y2 = "[" Z ")" [a-z];
lexical Z = [a-z];
```
@@ -166,7 +166,7 @@ private map[Symbol, Maybe[Symbol]] getOuterDelimiterBySymbol(Grammar g, Directio
ret[s] = unique(delimiters);
}
}
-
+
return ret;
}
diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Newlines.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Newlines.rsc
index b4e0d0b..5d3ffc8 100644
--- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Newlines.rsc
+++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Newlines.rsc
@@ -55,7 +55,7 @@ private map[Production, Maybe[set[Segment]]] getSegmentsByProduction(Grammar g)
}
private Maybe[set[Segment]] getSegmentsWithEnvironment(
- Grammar g, list[Symbol] symbols,
+ Grammar g, list[Symbol] symbols,
map[Production, Maybe[set[Segment]]] env) {
// General idea: Recursively traverse `symbols` from left to right, while
@@ -73,9 +73,9 @@ private Maybe[set[Segment]] getSegmentsWithEnvironment(
set[Symbol] nested = {s | /Symbol s := head};
Maybe[set[Segment]] finished = get(running, [], final = tail == []);
-
+
// If the head contains a non-terminal, then: (1) finish the running
- // segment; (2) lookup the segments of the non-terminals in the
+ // segment; (2) look up the segments of the non-terminals in the
// environment, if any; (3) compute the segments of the tail. Return the
// union of 1-3.
if (any(s <- nested, isNonTerminalType(s))) {
@@ -85,7 +85,7 @@ private Maybe[set[Segment]] getSegmentsWithEnvironment(
sets += finished;
// (2)
- sets += for (s <- nested, isNonTerminalType(s), p <- lookup(g, s)) {
+ sets += for (s <- nested, isNonTerminalType(s), p <- prodsOf(g, s)) {
bool isInitial(Segment seg)
= seg.initial && running.initial && running.symbols == [];
@@ -93,7 +93,7 @@ private Maybe[set[Segment]] getSegmentsWithEnvironment(
= seg.final && tail == [];
Segment update(Segment seg)
= seg[initial = isInitial(seg)][final = isFinal(seg)];
-
+
append just(segs) := env[p] ? just({update(seg) | seg <- segs}) : nothing();
}
@@ -103,7 +103,7 @@ private Maybe[set[Segment]] getSegmentsWithEnvironment(
// Return union
return (sets[0] | union(it, \set) | \set <- sets[1..]);
}
-
+
// If the head doesn't contain a non-terminal, but it has a newline,
// then: (1) finish the running segment; (2) compute the segments of the
// tail. Return the union of 1-2. Note: the head, as it has a newline,
@@ -111,13 +111,13 @@ private Maybe[set[Segment]] getSegmentsWithEnvironment(
else if (any(s <- nested, hasNewline(g, s))) {
return union(finished, get(segment([]), tail));
}
-
+
// If the head doesn't contain a non-terminal, and if it doesn't have a
// newline, then add the head to the running segment and proceed with
// the tail.
else {
Segment old = running;
- Segment new = old[symbols = old.symbols + head];
+ Segment new = old[symbols = old.symbols + head];
return get(new, tail);
}
}
@@ -130,7 +130,7 @@ private Maybe[set[Segment]] getSegmentsWithEnvironment(
}
bool hasNewline(Grammar g, Symbol s) {
- return any(p <- lookup(g, delabel(s)), hasNewline(g, p));
+ return any(p <- prodsOf(g, delabel(s)), hasNewline(g, p));
}
@synopsis{
@@ -149,7 +149,7 @@ private map[Production, bool] hasNewlineByProduction(Grammar g) {
for (p <- ret, !ret[p]) {
set[Symbol] nonTerminals = {s | /Symbol s := p.symbols, isNonTerminalType(s)};
ret[p] = ret[p] || any(/r: range(_, _) := p.symbols, hasNewline(r))
- || any(s <- nonTerminals, Production child <- lookup(g, s), ret[child]);
+ || any(s <- nonTerminals, Production child <- prodsOf(g, s), ret[child]);
}
}
@@ -165,7 +165,7 @@ private map[Production, bool] hasNewlineByProduction(Grammar g) {
bool hasNewline(str s)
= LF in chars(s);
-
+
bool hasNewline(range(begin, end))
= begin <= LF && LF <= end;
diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Symbols.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Symbols.rsc
index 164d401..b05aa39 100644
--- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Symbols.rsc
+++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Symbols.rsc
@@ -56,7 +56,7 @@ private map[Symbol, Maybe[set[Symbol]]] firstBySymbol(Grammar g, bool(Symbol) pr
for (s <- ret, nothing() == ret[s]) {
if (predicate(s)) {
ret[s] = just({s});
- } else if (list[Production] prods: [_, *_] := lookup(g, s)) {
+ } else if (list[Production] prods: [_, *_] := prodsOf(g, s)) {
ret[s] = (just({}) | union(it, firstOf(reorder(p.symbols, dir))) | p <- prods);
} else {
ret[s] = just({\empty()});
@@ -84,7 +84,7 @@ set[Symbol] follow(Grammar g, Symbol s)
@memo
private map[Symbol, Maybe[set[Symbol]]] followBySymbol(Grammar g, bool(Symbol) predicate, Direction dir) {
map[Symbol, Maybe[set[Symbol]]] ret = (delabel(s): nothing() | s <- g.rules); // Non-terminals
-
+
Maybe[set[Symbol]] followOf(Symbol parent, [])
= ret[delabel(parent)];
Maybe[set[Symbol]] followOf(Symbol parent, [h, *t])
@@ -142,6 +142,8 @@ private default Maybe[int] max(Maybe[int] _, Maybe[int] _) = nothing();
Computes the length of a terminal symbol as a range
}
+Range length(label(_, symbol)) = length(symbol);
+
Range length(\lit(string)) = ;
Range length(\cilit(string)) = ;
Range length(\char-class(_)) = <1, just(1)>;
diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc
index c8e3d9c..fdeb7c5 100644
--- a/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc
+++ b/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc
@@ -13,6 +13,7 @@ import util::Monitor;
import lang::oniguruma::Conversion;
import lang::oniguruma::RegExp;
import lang::rascal::grammar::Util;
+import lang::rascal::grammar::analyze::Categories;
import lang::rascal::grammar::analyze::Delimiters;
import lang::rascal::grammar::analyze::Dependencies;
import lang::rascal::grammar::analyze::Newlines;
@@ -130,7 +131,7 @@ private RscGrammar replaceLegacySemanticTokenTypes(RscGrammar rsc)
- one synthetic *delimiters* production;
- zero-or-more *user-defined* productions (from `rsc`);
- one synthetic *keywords* production.
-
+
Each production in the list (including the synthetic ones) is *suitable for
conversion* to a TextMate rule. A production is "suitable for conversion"
when it satisfies each of the following conditions:
@@ -138,7 +139,7 @@ private RscGrammar replaceLegacySemanticTokenTypes(RscGrammar rsc)
- it does not match newlines;
- it does not match the empty word;
- it has a `@category` tag.
-
+
See the walkthrough for further motivation and examples.
}
@@ -167,22 +168,36 @@ private RscGrammar replaceLegacySemanticTokenTypes(RscGrammar rsc)
list[ConversionUnit] analyze(RscGrammar rsc, str name) {
str jobLabel = "Analyzing)">";
- jobStart(jobLabel, work = 4);
-
- // Define auxiliary predicates
- bool isCyclic(Production p, set[Production] ancestors, _)
- = p in ancestors;
- bool isNonEmpty(prod(def, _, _), _, _)
- = !tryParse(rsc, delabel(def), "");
- bool hasCategory(prod(_, _, attributes), _, _)
- = /\tag("category"(_)) := attributes;
+ jobStart(jobLabel, work = 6);
- // Analyze dependencies among productions
+ // Analyze productions
jobStep(jobLabel, "Analyzing productions");
- Graph[Production] graph = toGraph(rsc);
- list[Production] prods = deps(graph).retainProds(isNonEmpty).retainProds(hasCategory).getProds();
- list[Production] prodsNonRecursive = prods & deps(graph).removeProds(isCyclic, true).getProds();
- list[Production] prodsRecursive = prods - prodsNonRecursive;
+ list[Production] prods = [p | /p: prod(_, _, _) <- rsc];
+
+ // Analyze categories
+ jobStep(jobLabel, "Analyzing categories");
+ prods = for (p <- prods) {
+
+ // If `p` has 0 or >=2 categories, then ignore `p` (unclear which
+ // category should be used for highlighting)
+ set[str] categories = getCategories(rsc, p);
+ if ({_} !:= categories || {NO_CATEGORY} == categories) {
+ continue;
+ }
+
+ // If each parent of `p` has a category, then ignore `p` (the parents of
+ // `p` will be used for highlighting instead)
+ set[Production] parents = prodsWith(rsc, delabel(p.def));
+ if (!any(parent <- parents, NO_CATEGORY in getCategories(rsc, parent))) {
+ continue;
+ }
+
+ append p;
+ }
+
+ // Analyze emptiness
+ jobStep(jobLabel, "Analyzing emptiness");
+ prods = [p | p <- prods, !tryParse(rsc, delabel(p.def), "")];
// Analyze delimiters
jobStep(jobLabel, "Analyzing delimiters");
@@ -199,14 +214,11 @@ list[ConversionUnit] analyze(RscGrammar rsc, str name) {
// Prepare units
jobStep(jobLabel, "Preparing units");
-
- bool isRecursive(Production p)
- = p in prodsRecursive;
bool isEmptyProd(prod(_, [\alt(alternatives)], _))
= alternatives == {};
-
+
set[ConversionUnit] units = {};
- units += {unit(rsc, p, isRecursive(p), hasNewline(rsc, p), getOuterDelimiterPair(rsc, p), getInnerDelimiterPair(rsc, p, getOnlyFirst = true)) | p <- prods};
+ units += {unit(rsc, p, isRecursive(rsc, p), hasNewline(rsc, p), getOuterDelimiterPair(rsc, p), getInnerDelimiterPair(rsc, p, getOnlyFirst = true)) | p <- prods};
units += {unit(rsc, p, false, false, , ) | p <- prodsDelimiters + prodsKeywords, !isEmptyProd(p)};
list[ConversionUnit] ret = sort([*removeStrictPrefixes(units)]);
@@ -283,7 +295,7 @@ private list[ConversionUnit] addInnerRules(list[ConversionUnit] units) {
bool guard = nothing() := u.innerDelimiters.begin;
TmRule r = toTmRule(toRegExp(u.rsc, u.prod, guard = guard))
[name = "/inner/single/"];
-
+
rules = insertIn(rules, (u: r));
}
@@ -299,8 +311,8 @@ private list[ConversionUnit] addInnerRules(list[ConversionUnit] units) {
// Simple case: each unit does have an `end` inner delimiter
if (_ <- group && all(u <- group, just(_) := u.innerDelimiters.end)) {
-
- // Create a set of pointers to the first (resp. last) occurrence
+
+ // Create a set of pointers to the first (resp. last) occurrence
// of `pivot` in each unit, when `pivot` is a `begin` delimiter
// (resp. an `end` delimiter) of the group. If `pivot` occurs
// elsewhere in the grammar as well, then skip the conversion
@@ -308,9 +320,9 @@ private list[ConversionUnit] addInnerRules(list[ConversionUnit] units) {
// avoid tokenization mistakes in which the other occurrences of
// `pivot` in the input are mistakenly interpreted as the
// beginning or ending of a unit in the group.
-
+
Symbol pivot = key.val;
-
+
set[Pointer] pointers = {};
pointers += pivot in begins ? {*find(rsc, u.prod, pivot, dir = forward()) [-1..] | u <- group} : {};
pointers += pivot in ends ? {*find(rsc, u.prod, pivot, dir = backward())[-1..] | u <- group} : {};
@@ -330,7 +342,7 @@ private list[ConversionUnit] addInnerRules(list[ConversionUnit] units) {
toRegExp(rsc, [\alt(ends)], {t}),
[toTmRule(toRegExp(rsc, [s], {t})) | s <- toTerminals(segs)])
[name = "/inner/multi/"];
-
+
rules = insertIn(rules, (u: r | u <- group));
}
@@ -358,7 +370,7 @@ private list[ConversionUnit] addInnerRules(list[ConversionUnit] units) {
// and an `end` delimiter, then generate a
// begin/end pattern to highlight these delimiters
// and all content in between.
-
+
set[Segment] segs = getSegments(rsc, suffix);
segs = {removeBeginEnd(seg, {begin}, {end}) | seg <- segs};
@@ -367,7 +379,7 @@ private list[ConversionUnit] addInnerRules(list[ConversionUnit] units) {
toRegExp(rsc, [end], {t}),
[toTmRule(toRegExp(rsc, [s], {t})) | s <- toTerminals(segs)]);
}
-
+
else {
// If the suffix has a `begin` delimiter, but not
// an `end` delimiter, then generate a match pattern
@@ -463,7 +475,7 @@ private list[ConversionUnit] addOuterRules(list[ConversionUnit] units) {
toRegExp(rsc, [\alt(ends)], {}),
[include("#") | TmRule r <- innerRules])
[name = "/outer/"];
-
+
rules = insertIn(rules, (u: r | u <- group));
}
}
diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Emoji.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Emoji.rsc
index cb8fc28..176cb97 100644
--- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Emoji.rsc
+++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Emoji.rsc
@@ -8,6 +8,11 @@ import lang::textmate::Conversion;
import lang::textmate::ConversionTests;
import lang::textmate::ConversionUnit;
+start syntax Start
+ = Unit
+ | Boolean
+ ;
+
lexical Unit
= @category="constant.language" [🌊];
@@ -16,7 +21,7 @@ lexical Boolean
| @category="constant.language" [🙁]
;
-Grammar rsc = preprocess(grammar(#Boolean));
+Grammar rsc = preprocess(grammar(#Start));
list[ConversionUnit] units = [
unit(rsc, prod(lex("Boolean"),[lit("🙂")],{\tag("category"("constant.language"))}), false, false, , ),
diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/NestedCategories.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/NestedCategories.rsc
new file mode 100644
index 0000000..d1229eb
--- /dev/null
+++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/NestedCategories.rsc
@@ -0,0 +1,99 @@
+module lang::textmate::conversiontests::NestedCategories
+
+import Grammar;
+import ParseTree;
+import util::Maybe;
+
+import lang::textmate::Conversion;
+import lang::textmate::ConversionConstants;
+import lang::textmate::ConversionTests;
+import lang::textmate::ConversionUnit;
+
+start syntax Start
+ = A01 | A02 | A03 | A04 | A05 | A06 | A07 | A08 | A09 | A10 | A11 | A12;
+
+lexical A01 = @category="a" B01 [\ ] C01;
+lexical B01 = @category="b" D01 [\ ] "bar01";
+lexical C01 = @category="c" D01 [\ ] "baz01";
+lexical D01 = @category="d" "foo01";
+
+lexical A02 = @category="a" B02 [\ ] C02;
+lexical B02 = @category="b" D02 [\ ] "bar02";
+lexical C02 = @category="c" D02 [\ ] "baz02";
+lexical D02 = "foo02";
+
+lexical A03 = @category="a" B03 [\ ] C03;
+lexical B03 = @category="b" D03 [\ ] "bar03";
+lexical C03 = D03 [\ ] "baz03";
+lexical D03 = @category="d" "foo03";
+
+lexical A04 = @category="a" B04 [\ ] C04;
+lexical B04 = @category="b" D04 [\ ] "bar04";
+lexical C04 = D04 [\ ] "baz04";
+lexical D04 = "foo04";
+
+lexical A05 = @category="a" B05 [\ ] C05;
+lexical B05 = D05 [\ ] "bar05";
+lexical C05 = D05 [\ ] "baz05";
+lexical D05 = @category="d" "foo05";
+
+lexical A06 = @category="a" B06 [\ ] C06;
+lexical B06 = D06 [\ ] "bar06";
+lexical C06 = D06 [\ ] "baz06";
+lexical D06 = "foo06";
+
+lexical A07 = B07 [\ ] C07;
+lexical B07 = @category="b" D07 [\ ] "bar07";
+lexical C07 = @category="c" D07 [\ ] "baz07";
+lexical D07 = @category="d" "foo07";
+
+lexical A08 = B08 [\ ] C08;
+lexical B08 = @category="b" D08 [\ ] "bar08";
+lexical C08 = @category="c" D08 [\ ] "baz08";
+lexical D08 = "foo08";
+
+lexical A09 = B09 [\ ] C09;
+lexical B09 = @category="b" D09 [\ ] "bar09";
+lexical C09 = D09 [\ ] "baz09";
+lexical D09 = @category="d" "foo09"; // Design decision: D09 should be converted
+ // to a TextMate rule, because it's
+ // reachable via C09, which doesn't have a
+ // category
+
+lexical A10 = B10 [\ ] C10;
+lexical B10 = @category="b" D10 [\ ] "bar10";
+lexical C10 = D10 [\ ] "baz10";
+lexical D10 = "foo10";
+
+lexical A11 = B11 [\ ] C11;
+lexical B11 = D11 [\ ] "bar11";
+lexical C11 = D11 [\ ] "baz11";
+lexical D11 = @category="d" "foo11";
+
+lexical A12 = B12 [\ ] C12;
+lexical B12 = D12 [\ ] "bar12";
+lexical C12 = D12 [\ ] "baz12";
+lexical D12 = "foo12";
+
+Grammar rsc = preprocess(grammar(#Start));
+
+list[ConversionUnit] units = [
+ unit(rsc, prod(lex("C07"),[lex("D07"),lit(" "),lit("baz07")],{\tag("category"("c"))}), false, false, , ),
+ unit(rsc, prod(lex("C08"),[lex("D08"),lit(" "),lit("baz08")],{\tag("category"("c"))}), false, false, , ),
+ unit(rsc, prod(lex("A01"),[lex("B01"),lit(" "),lex("C01")],{\tag("category"("a"))}), false, false, , ),
+ unit(rsc, prod(lex("A02"),[lex("B02"),lit(" "),lex("C02")],{\tag("category"("a"))}), false, false, , ),
+ unit(rsc, prod(lex("A03"),[lex("B03"),lit(" "),lex("C03")],{\tag("category"("a"))}), false, false, , ),
+ unit(rsc, prod(lex("A04"),[lex("B04"),lit(" "),lex("C04")],{\tag("category"("a"))}), false, false, , ),
+ unit(rsc, prod(lex("A05"),[lex("B05"),lit(" "),lex("C05")],{\tag("category"("a"))}), false, false, , ),
+ unit(rsc, prod(lex("A06"),[lex("B06"),lit(" "),lex("C06")],{\tag("category"("a"))}), false, false, , ),
+ unit(rsc, prod(lex("B07"),[lex("D07"),lit(" "),lit("bar07")],{\tag("category"("b"))}), false, false, , ),
+ unit(rsc, prod(lex("B08"),[lex("D08"),lit(" "),lit("bar08")],{\tag("category"("b"))}), false, false, , ),
+ unit(rsc, prod(lex("B09"),[lex("D09"),lit(" "),lit("bar09")],{\tag("category"("b"))}), false, false, , ),
+ unit(rsc, prod(lex("B10"),[lex("D10"),lit(" "),lit("bar10")],{\tag("category"("b"))}), false, false, , ),
+ unit(rsc, prod(lex("D09"),[lit("foo09")],{\tag("category"("d"))}), false, false, , ),
+ unit(rsc, prod(lex("D11"),[lit("foo11")],{\tag("category"("d"))}), false, false, , ),
+ unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("foo07"),lit("foo06"),lit("foo09"),lit("foo08"),lit("foo03"),lit("foo02"),lit("foo05"),lit("foo04"),lit("foo10"),lit("baz09"),lit("foo11"),lit("baz06"),lit("baz05"),lit("baz08"),lit("baz07"),lit("baz02"),lit("baz04"),lit("baz03"),lit("bar06"),lit("bar05"),lit("bar02"),lit("bar04"),lit("bar03"),lit("bar11"),lit("bar10"),lit("foo12"),lit("foo01"),lit("baz12"),lit("baz01"),lit("bar09"),lit("baz11"),lit("bar08"),lit("baz10"),lit("bar07"),lit("bar12"),lit("bar01")})],{\tag("category"("keyword.control"))}), false, false, , )
+];
+
+test bool analyzeTest() = doAnalyzeTest(rsc, units, name = "NestedCategories");
+test bool transformTest() = doTransformTest(units, <15, 0, 0>, name = "NestedCategories");
diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/NestedCategories.test b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/NestedCategories.test
new file mode 100644
index 0000000..500b498
--- /dev/null
+++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/NestedCategories.test
@@ -0,0 +1,211 @@
+# SYNTAX TEST "NestedCategories"
+
+ foo01 bar01 foo01 baz01
+# ^ a
+# ^^^^^^ b
+# ^^^^^^ c
+# ^^^^^ ^^^^^ d
+
+ foo01 bar01
+# ^^^^^^^^^^^ -b
+# ^^^^^ -d
+
+ foo01 baz01
+# ^^^^^^^^^^^ -c
+# ^^^^^ -d
+
+ foo01
+# ^^^^^ -d
+
+ foo02 bar02 foo02 baz02
+# ^ a
+# ^^^^^^^^^^^ b
+# ^^^^^^^^^^^ c
+# ^^^^^ ^^^^^ -d
+
+ foo02 bar02
+# ^^^^^^^^^^^ -b
+# ^^^^^ -d
+
+ foo02 baz02
+# ^^^^^^^^^^^ -c
+# ^^^^^ -d
+
+ foo02
+# ^^^^^ -d
+
+ foo03 bar03 foo03 baz03
+# ^ ^^^^^^ a
+# ^^^^^^ b
+# ^^^^^^^^^^^ -c
+# ^^^^^ ^^^^^ d
+
+ foo03 bar03
+# ^^^^^^^^^^^ -b
+# ^^^^^ -d
+
+ foo03 baz03
+# ^^^^^^^^^^^ -c
+# ^^^^^ -d
+
+ foo03
+# ^^^^^ -d
+
+ foo04 bar04 foo04 baz04
+# ^^^^^^^^^^^^ a
+# ^^^^^^^^^^^ b
+# ^^^^^^^^^^^ -c
+# ^^^^^ ^^^^^ -d
+
+ foo04 bar04
+# ^^^^^^^^^^^ -b
+# ^^^^^ -d
+
+ foo04 baz04
+# ^^^^^^^^^^^ -c
+# ^^^^^ -d
+
+ foo04
+# ^^^^^ -d
+
+ foo05 bar05 foo05 baz05
+# ^^^^^^^ ^^^^^^ a
+# ^^^^^^^^^^^ -b
+# ^^^^^^^^^^^ -c
+# ^^^^^ ^^^^^ d
+
+ foo05 bar05
+# ^^^^^^^^^^^ -b
+# ^^^^^ -d
+
+ foo05 baz05
+# ^^^^^^^^^^^ -c
+# ^^^^^ -d
+
+ foo05
+# ^^^^^ -d
+
+ foo06 bar06 foo06 baz06
+# ^^^^^^^^^^^^^^^^^^^^^^^ a
+# ^^^^^^^^^^^ -b
+# ^^^^^^^^^^^ -c
+# ^^^^^ ^^^^^ -d
+
+ foo06 bar06
+# ^^^^^^^^^^^ -b
+# ^^^^^ -d
+
+ foo06 baz06
+# ^^^^^^^^^^^ -c
+# ^^^^^ -d
+
+ foo06
+# ^^^^^ -d
+
+ foo07 bar07 foo07 baz07
+# ^^^^^^^^^^^^^^^^^^^^^^^ -a
+# ^^^^^^ b
+# ^^^^^^ c
+# ^^^^^ ^^^^^ d
+
+ foo07 bar07
+# ^^^^^^ b
+# ^^^^^ d
+
+ foo07 baz07
+# ^^^^^^ c
+# ^^^^^ d
+
+ foo07
+# ^^^^^ -d
+
+ foo08 bar08 foo08 baz08
+# ^^^^^^^^^^^^^^^^^^^^^^^ -a
+# ^^^^^^^^^^^ b
+# ^^^^^^^^^^^ c
+# ^^^^^ ^^^^^ -d
+
+ foo08 bar08
+# ^^^^^^^^^^^ b
+# ^^^^^ -d
+
+ foo08 baz08
+# ^^^^^^^^^^^ c
+# ^^^^^ -d
+
+ foo08
+# ^^^^^ -d
+
+ foo09 bar09 foo09 baz09
+# ^^^^^^^^^^^^^^^^^^^^^^^ -a
+# ^^^^^^ b
+# ^^^^^^^^^^^ -c
+# ^^^^^ ^^^^^ d
+
+ foo09 bar09
+# ^^^^^^ b
+# ^^^^^ d
+
+ foo09 baz09
+# ^^^^^^^^^^^ -c
+# ^^^^^ d
+
+ foo09
+# ^^^^^ d
+
+ foo10 bar10 foo10 baz10
+# ^^^^^^^^^^^^^^^^^^^^^^^ -a
+# ^^^^^^^^^^^ b
+# ^^^^^^^^^^^ -c
+# ^^^^^ ^^^^^ -d
+
+ foo10 bar10
+# ^^^^^^^^^^^ b
+# ^^^^^ -d
+
+ foo10 baz10
+# ^^^^^^^^^^^ -c
+# ^^^^^ -d
+
+ foo10
+# ^^^^^ -d
+
+ foo11 bar11 foo11 baz11
+# ^^^^^^^^^^^^^^^^^^^^^^^ -a
+# ^^^^^^^^^^^ -b
+# ^^^^^^^^^^^ -c
+# ^^^^^ ^^^^^ d
+
+ foo11 bar11
+# ^^^^^^^^^^^ -b
+# ^^^^^ d
+
+ foo11 baz11
+# ^^^^^^^^^^^ -c
+# ^^^^^ d
+
+ foo11
+# ^^^^^ d
+
+ foo12 bar12 foo12 baz12
+# ^^^^^^^^^^^^^^^^^^^^^^^ -a
+# ^^^^^^^^^^^ -b
+# ^^^^^^^^^^^ -c
+# ^^^^^ ^^^^^ -d
+
+ foo11 bar11 foo11 baz11
+# ^^^^^^^^^^^^^^^^^^^^^^^ -a
+# ^^^^^^^^^^^ -b
+# ^^^^^^^^^^^ -c
+# ^^^^^ ^^^^^ d
+
+ foo12 bar12
+# ^^^^^^^^^^^ -b
+# ^^^^^ -d
+
+ foo12 baz12
+# ^^^^^^^^^^^ -c
+# ^^^^^ -d
+
+ foo12
+# ^^^^^ -d
\ No newline at end of file
diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalClass.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalClass.rsc
index 438e3d5..51627d4 100644
--- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalClass.rsc
+++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalClass.rsc
@@ -11,6 +11,8 @@ import lang::textmate::ConversionUnit;
// Based on `lang::rascal::\syntax::Rascal`
+start syntax Start = Class;
+
syntax Class
= simpleCharclass: "[" Range* ranges "]"
| complement: "!" Class charClass
@@ -37,7 +39,7 @@ lexical UnicodeEscape
| ascii: "\\" [a] [0-7] [0-9A-Fa-f]
;
-Grammar rsc = preprocess(grammar(#Class));
+Grammar rsc = preprocess(grammar(#Start));
list[ConversionUnit] units = [
unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("-"),lit(")"),lit("("),lit("!"),lit("||"),lit("&&")})],{}), false, false, , ),
diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalComment.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalComment.rsc
index 0b564ce..ff2f1d1 100644
--- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalComment.rsc
+++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalComment.rsc
@@ -10,12 +10,14 @@ import lang::textmate::ConversionUnit;
// Based on `lang::rascal::\syntax::Rascal`
+start syntax Start = Comment;
+
lexical Comment
= @category="Comment" "/*" (![*] | [*] !>> [/])* "*/"
| @category="Comment" "//" ![\n]* !>> [\ \t\r \u00A0 \u1680 \u2000-\u200A \u202F \u205F \u3000] $
;
-Grammar rsc = preprocess(grammar(#Comment));
+Grammar rsc = preprocess(grammar(#Start));
list[ConversionUnit] units = [
unit(rsc, prod(lex("Comment"),[lit("//"),conditional(\iter-star(\char-class([range(1,9),range(11,1114111)])),{\not-follow(\char-class([range(9,9),range(13,13),range(32,32),range(160,160),range(5760,5760),range(8192,8202),range(8239,8239),range(8287,8287),range(12288,12288)])),\end-of-line()})],{\tag("category"("comment"))}), false, false, , ),
diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalConcrete.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalConcrete.rsc
index ee6df07..cbf925f 100644
--- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalConcrete.rsc
+++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalConcrete.rsc
@@ -11,6 +11,8 @@ import lang::textmate::ConversionUnit;
// Based on `lang::rascal::\syntax::Rascal`
+start syntax Start = Concrete;
+
lexical Concrete
= typed: /* "(" LAYOUTLIST l1 Sym symbol LAYOUTLIST l2 ")" LAYOUTLIST l3 */ "`" ConcretePart* parts "`";
@@ -27,7 +29,7 @@ lexical ConcretePart
syntax ConcreteHole
= \one: "\<" /* Sym symbol Name name */ "\>";
-Grammar rsc = preprocess(grammar(#Concrete));
+Grammar rsc = preprocess(grammar(#Start));
list[ConversionUnit] units = [
unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("\n"),lit("\'")})],{}), false, false, , ),
diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringConstant.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringConstant.rsc
index 3f94eca..2ccb630 100644
--- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringConstant.rsc
+++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringConstant.rsc
@@ -11,6 +11,8 @@ import lang::textmate::ConversionUnit;
// Based on `lang::rascal::\syntax::Rascal`
+start syntax Start = StringConstant;
+
lexical StringConstant
= @category="Constant" "\"" StringCharacter* chars "\"" ;
@@ -27,7 +29,7 @@ lexical UnicodeEscape
| ascii: "\\" [a] [0-7] [0-9A-Fa-f]
;
-Grammar rsc = preprocess(grammar(#StringConstant));
+Grammar rsc = preprocess(grammar(#Start));
list[ConversionUnit] units = [
unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("\n"),lit("\'"),lit("\\")})],{}), false, false, , ),
diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringLiteral.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringLiteral.rsc
index ef27235..7e719bf 100644
--- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringLiteral.rsc
+++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringLiteral.rsc
@@ -11,6 +11,8 @@ import lang::textmate::ConversionUnit;
// Based on `lang::rascal::\syntax::Rascal`
+start syntax Start = StringLiteral;
+
syntax StringLiteral
= template: PreStringChars pre StringTemplate template StringTail tail
| interpolated: PreStringChars pre Expression expression StringTail tail
@@ -66,7 +68,7 @@ syntax Expression
| lessThan : Expression lhs "\<" !>> "-" Expression rhs
| greaterThan : Expression lhs "\>" Expression rhs );
-Grammar rsc = preprocess(grammar(#StringLiteral));
+Grammar rsc = preprocess(grammar(#Start));
list[ConversionUnit] units = [
unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("-"),lit(","),lit(")"),lit("("),lit("\n"),lit("\'"),lit("\<="),lit("}"),lit("\\"),lit("\>="),lit(";"),lit("{")})],{}), false, false, , ),
diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalTag.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalTag.rsc
index 7cf92fc..372c8dc 100644
--- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalTag.rsc
+++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalTag.rsc
@@ -11,6 +11,8 @@ import lang::textmate::ConversionUnit;
// Based on `lang::rascal::\syntax::Rascal`
+start syntax Start = Tag;
+
syntax Tag
= @Folded @category="Comment" \default : "@" Name name TagString contents
| @Folded @category="Comment" empty : "@" Name name
@@ -37,7 +39,7 @@ lexical LAYOUT
layout LAYOUTLIST
= LAYOUT* !>> [\u0009-\u000D \u0020 \u0085 \u00A0 \u1680 \u180E \u2000-\u200A \u2028 \u2029 \u202F \u205F \u3000] /* !>> "//" !>> "/*" */;
-Grammar rsc = preprocess(grammar(#Tag));
+Grammar rsc = preprocess(grammar(#Start));
list[ConversionUnit] units = [
unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("="),lit("\\"),lit(";"),lit("{")})],{}), false, false, , ),