usethesource · PieterOlivier · Nov 3, 2024 · Nov 3, 2024 · Nov 3, 2024
diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PrefixSharingRecoveryTest.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PrefixSharingRecoveryTest.rsc
@@ -0,0 +1,19 @@
+module lang::rascal::tests::concrete::recovery::PrefixSharingRecoveryTest
+
+import lang::rascal::tests::concrete::recovery::RecoveryTestSupport;
+
+layout Layout = [\ ]* !>> [\ ];
+
+start syntax Prog = Expr Expr;
+
+syntax Expr = [s] "+" "1"
+    | [s] "+" "2";
+
+test bool prefixSharedOk() = checkRecovery(#Prog, "s+2s+1", []);
+
+// When taking prefix sharing into account when determining end matchers in the error recover,
+// prefixSharedError1()  and 2 will succeed.
+// However, this costs considerable performance for very little gain in practice,
+// so for now we have disabled the prefix sharing support in `addPrefixSharedEndMatchers` 
+//test bool prefixSharedError1() = checkRecovery(#Prog, "s-1s+1", ["-1"], visualize=false);
+test bool prefixSharedError2() = checkRecovery(#Prog, "s-2s+1", ["-2"], visualize=false);
diff --git a/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java b/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java
@@ -27,6 +27,7 @@
 import org.rascalmpl.parser.gtd.stack.CaseInsensitiveLiteralStackNode;
 import org.rascalmpl.parser.gtd.stack.EmptyStackNode;
 import org.rascalmpl.parser.gtd.stack.EpsilonStackNode;
+import org.rascalmpl.parser.gtd.stack.ListStackNode;
 import org.rascalmpl.parser.gtd.stack.LiteralStackNode;
 import org.rascalmpl.parser.gtd.stack.NonTerminalStackNode;
 import org.rascalmpl.parser.gtd.stack.RecoveryPointStackNode;
@@ -42,6 +43,7 @@
 import org.rascalmpl.parser.gtd.util.Stack;
 import org.rascalmpl.parser.uptr.recovery.InputMatcher.MatchResult;
 import org.rascalmpl.parser.util.ParseStateVisualizer;
+import org.rascalmpl.values.RascalValueFactory;
 import org.rascalmpl.values.parsetrees.ProductionAdapter;
 
 import io.usethesource.vallang.IConstructor;
@@ -143,17 +145,17 @@ private List<SkippingStackNode<IConstructor>> findSkippingNodes(int[] input, int
 			result = SkippingStackNode.createResultUntilEndOfInput(uri, input, startLocation);
 			nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(), prod, result, startLocation));
 			return nodes; // No other nodes would be useful
-			}
+		}
 
 		// If we are the top-level node, just skip the rest of the input
 		if (!recoveryNode.isEndNode() && isTopLevelProduction(recoveryNode)) {
 			result = SkippingStackNode.createResultUntilEndOfInput(uri, input, startLocation);
 			nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(), prod, result, startLocation));
-			return nodes;	// No other nodes would be useful
+			return nodes; // No other nodes would be useful
 		}
 
 		// Find the last token of this production and skip until after that
-		List<InputMatcher> endMatchers = findEndMatchers(recoveryNode);
+		List<InputMatcher> endMatchers = findEndMatchers(recoveryNode.getProduction()[recoveryNode.getDot()]);
 		for (InputMatcher endMatcher : endMatchers) {
 			// For now take a very large (basically unlimited) "max match length", experiment with smaller limit later
 			MatchResult endMatch = endMatcher.findMatch(input, startLocation, Integer.MAX_VALUE/2);
@@ -182,7 +184,7 @@ private List<InputMatcher> findEndMatchers(AbstractStackNode<IConstructor> stack
 		final List<InputMatcher> matchers = new java.util.ArrayList<>();
 
 		AbstractStackNode<IConstructor>[] prod = stackNode.getProduction();
-		addEndMatchers(prod, prod.length-1, matchers, new HashSet<>());
+		addPrefixSharedEndMatchers(prod, stackNode.getDot(), matchers, new HashSet<>());
 
 		IConstructor parentProduction = stackNode.getParentProduction();
 		if (parentProduction != null && ProductionAdapter.isContextFree(parentProduction)) {
@@ -191,18 +193,67 @@ private List<InputMatcher> findEndMatchers(AbstractStackNode<IConstructor> stack
 
 		return matchers;
 	}
-	
-	private void addEndMatchers(AbstractStackNode<IConstructor>[] prod, int dot, List<InputMatcher> matchers,
-		Set<Integer> visitedNodes) {
+
+	@SuppressWarnings("java:S5125") // We purposely commented out some code
+	private void addPrefixSharedEndMatchers(AbstractStackNode<IConstructor>[] prod, int dot, List<InputMatcher> matchers, Set<Integer> visitedNodes) {
 		if (prod == null || dot < 0 || dot >= prod.length) {
 			return;
 		}
 
-		AbstractStackNode<IConstructor> last = prod[dot];
-		if (visitedNodes.contains(last.getId())) {
+		for (int i=dot; i<prod.length; i++) {
+			AbstractStackNode<IConstructor> last = prod[i];
+
+			if (visitedNodes.contains(last.getId())) {
+				continue;
+			}
+			visitedNodes.add(last.getId());
+
+			if (maybeEndNode(prod, i)) {
+				addEndMatchers(prod, i, matchers, visitedNodes);
+			}
+
+			// Although also following prefix shared productions to find end matchers helps
+			// in contrived examples (see PrefixSharingRecoveryTest.rsc), it does not significantly improve recovery
+			// quality in practice while slowing down error recovery considerably.
+			// So we have disabled this for now, but we may visit this topic in the future.
+
+			/*AbstractStackNode<IConstructor>[][] alternateProductions = last.getAlternateProductions();
+			if (alternateProductions != null) {
+				for (AbstractStackNode<IConstructor>[] alternateProduction : alternateProductions) {
+					addPrefixSharedEndMatchers(alternateProduction, dot, matchers, visitedNodes);
+				}
+			}
+			*/
+		}
+	}
+
+	private boolean maybeEndNode(AbstractStackNode<IConstructor>[] prod, int dot) {
+		while (dot < prod.length) {
+			if (prod[dot].isEndNode()) {
+				return true;
+			}
+			if (!isNullable(prod[dot])) {
+				return false;
+			}
+
+			dot = dot + 1;
+		}
+
+		throw new AssertionError("No end node found?"); // Should not happen, last node is always an end node
+	}
+
+	// Check if a node is a top-level production (i.e., its parent production node has no parents and
+	// starts at position -1)
+	private boolean isTopLevelProduction(AbstractStackNode<IConstructor> node) {
+		return node.getProduction()[0].getStartLocation() == -1;
+	}
+
+	private void addEndMatchers(AbstractStackNode<IConstructor>[] prod, int dot, List<InputMatcher> matchers, Set<Integer> visitedNodes) {
+		if (prod == null || dot < 0 || dot >= prod.length) {
 			return;
 		}
-		visitedNodes.add(last.getId());
+
+		AbstractStackNode<IConstructor> last = prod[dot];
 
 		if (isNullable(last)) {
 			addEndMatchers(prod, dot-1, matchers, visitedNodes);
@@ -226,10 +277,13 @@ public Void visit(NonTerminalStackNode<IConstructor> nonTerminal) {
 				String name = nonTerminal.getName();
 				AbstractStackNode<IConstructor>[] alternatives = expectsProvider.getExpects(name);
 				for (AbstractStackNode<IConstructor> alternative : alternatives) {
-					addEndMatchers(alternative.getProduction(), 0, matchers, visitedNodes);
+					AbstractStackNode<IConstructor>[] children = alternative.getProduction();
+					addPrefixSharedEndMatchers(children, children.length-1, matchers, visitedNodes);
 				}
 				return null;
 			}
+
+			// TODO: list of characters so we can match things like identifiers
 		});
 	}
 
@@ -302,7 +356,9 @@ public Void visit(NonTerminalStackNode<IConstructor> nonTerminal) {
 				}
 
 				return null;
-		}
+			}
+
+			// TODO: list of characters so we can match things like identifiers
 		});
 	}
 
@@ -322,38 +378,6 @@ private boolean isNullable(AbstractStackNode<IConstructor> stackNode) {
 		return false;
 	}
 
-	// Check if a node is a top-level production (i.e., its parent production node has no parents and
-	// starts at position -1)
-	// As this is experimental code, this method is extremely conservative.
-	// Any sharing will result in returning 'false'.
-	// We will need to change this strategy in the future to improve error recovery.
-	private boolean isTopLevelProduction(AbstractStackNode<IConstructor> node) {
-
-		while (node != null && node.getDot() != 0) {
-			node = getSinglePredecessor(node);
-		}
-
-		if (node != null) {
-			node = getSinglePredecessor(node);
-			return node != null && node.getStartLocation() == -1;
-		}
-
-		return false;
-	}
-
-	private AbstractStackNode<IConstructor> getSinglePredecessor(AbstractStackNode<IConstructor> node) {
-		IntegerObjectList<EdgesSet<IConstructor>> edgeMap = node.getEdges();
-		if (edgeMap.size() == 1) {
-			EdgesSet<IConstructor> edges = edgeMap.getValue(0);
-			if (edges.size() == 1) {
-				return edges.get(0);
-			}
-		}
-
-		return null;
-	}
-
-
     private DoubleArrayList<AbstractStackNode<IConstructor>, AbstractNode> reviveFailedNodes(
 		int[] input,
 		int location,