From bf4c3d99f761d5cb79ec59e2e92d1524ad06a496 Mon Sep 17 00:00:00 2001
From: Pieter Olivier <pieter.olivier@swat.engineering>
Date: Sun, 3 Nov 2024 11:35:15 +0100
Subject: [PATCH 1/3] Implemented and disabled prefix-sharing support when
 finding end matcher

Disabled because in practice this seems to costs a lot of performance
for very little gain.
---
 .../uptr/recovery/ToTokenRecoverer.java       | 109 ++++++++++--------
 1 file changed, 60 insertions(+), 49 deletions(-)
diff --git a/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java b/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java
index 307c71838f..10c566cfca 100644
--- a/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java
+++ b/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java
@@ -27,6 +27,7 @@
 import org.rascalmpl.parser.gtd.stack.CaseInsensitiveLiteralStackNode;
 import org.rascalmpl.parser.gtd.stack.EmptyStackNode;
 import org.rascalmpl.parser.gtd.stack.EpsilonStackNode;
+import org.rascalmpl.parser.gtd.stack.ListStackNode;
 import org.rascalmpl.parser.gtd.stack.LiteralStackNode;
 import org.rascalmpl.parser.gtd.stack.NonTerminalStackNode;
 import org.rascalmpl.parser.gtd.stack.RecoveryPointStackNode;
@@ -42,6 +43,7 @@
 import org.rascalmpl.parser.gtd.util.Stack;
 import org.rascalmpl.parser.uptr.recovery.InputMatcher.MatchResult;
 import org.rascalmpl.parser.util.ParseStateVisualizer;
+import org.rascalmpl.values.RascalValueFactory;
 import org.rascalmpl.values.parsetrees.ProductionAdapter;
 
 import io.usethesource.vallang.IConstructor;
@@ -143,17 +145,10 @@ private List<SkippingStackNode<IConstructor>> findSkippingNodes(int[] input, int
 			result = SkippingStackNode.createResultUntilEndOfInput(uri, input, startLocation);
 			nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(), prod, result, startLocation));
 			return nodes; // No other nodes would be useful
-			}
-
-		// If we are the top-level node, just skip the rest of the input
-		if (!recoveryNode.isEndNode() && isTopLevelProduction(recoveryNode)) {
-			result = SkippingStackNode.createResultUntilEndOfInput(uri, input, startLocation);
-			nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(), prod, result, startLocation));
-			return nodes;	// No other nodes would be useful
 		}
 
 		// Find the last token of this production and skip until after that
-		List<InputMatcher> endMatchers = findEndMatchers(recoveryNode);
+		List<InputMatcher> endMatchers = findEndMatchers(recoveryNode.getProduction()[recoveryNode.getDot()]);
 		for (InputMatcher endMatcher : endMatchers) {
 			// For now take a very large (basically unlimited) "max match length", experiment with smaller limit later
 			MatchResult endMatch = endMatcher.findMatch(input, startLocation, Integer.MAX_VALUE/2);
@@ -182,7 +177,7 @@ private List<InputMatcher> findEndMatchers(AbstractStackNode<IConstructor> stack
 		final List<InputMatcher> matchers = new java.util.ArrayList<>();
 
 		AbstractStackNode<IConstructor>[] prod = stackNode.getProduction();
-		addEndMatchers(prod, prod.length-1, matchers, new HashSet<>());
+		addPrefixSharedEndMatchers(prod, stackNode.getDot(), matchers, new HashSet<>());
 
 		IConstructor parentProduction = stackNode.getParentProduction();
 		if (parentProduction != null && ProductionAdapter.isContextFree(parentProduction)) {
@@ -191,18 +186,61 @@ private List<InputMatcher> findEndMatchers(AbstractStackNode<IConstructor> stack
 
 		return matchers;
 	}
-	
-	private void addEndMatchers(AbstractStackNode<IConstructor>[] prod, int dot, List<InputMatcher> matchers,
-		Set<Integer> visitedNodes) {
+
+	@SuppressWarnings("java:S5125") // We purposely commented out some code
+	private void addPrefixSharedEndMatchers(AbstractStackNode<IConstructor>[] prod, int dot, List<InputMatcher> matchers, Set<Integer> visitedNodes) {
 		if (prod == null || dot < 0 || dot >= prod.length) {
 			return;
 		}
 
-		AbstractStackNode<IConstructor> last = prod[dot];
-		if (visitedNodes.contains(last.getId())) {
+		for (int i=dot; i<prod.length; i++) {
+			AbstractStackNode<IConstructor> last = prod[i];
+
+			if (visitedNodes.contains(last.getId())) {
+				continue;
+			}
+			visitedNodes.add(last.getId());
+
+			if (maybeEndNode(prod, i)) {
+				addEndMatchers(prod, i, matchers, visitedNodes);
+			}
+
+			// Although also following prefix shared productions to find end matchers helps
+			// in contrived examples (see PrefixSharingRecoveryTest.rsc), it does not significantly improve recovery
+			// quality in practice while slowing down error recovery considerably.
+			// So we have disabled this for now, but we may visit this topic in the future.
+
+			/*AbstractStackNode<IConstructor>[][] alternateProductions = last.getAlternateProductions();
+			if (alternateProductions != null) {
+				for (AbstractStackNode<IConstructor>[] alternateProduction : alternateProductions) {
+					addPrefixSharedEndMatchers(alternateProduction, dot, matchers, visitedNodes);
+				}
+			}
+			*/
+		}
+	}
+
+	private boolean maybeEndNode(AbstractStackNode<IConstructor>[] prod, int dot) {
+		while (dot < prod.length) {
+			if (prod[dot].isEndNode()) {
+				return true;
+			}
+			if (!isNullable(prod[dot])) {
+				return false;
+			}
+
+			dot = dot + 1;
+		}
+
+		throw new AssertionError("No end node found?"); // Should not happen, last node is always an end node
+	}
+	
+	private void addEndMatchers(AbstractStackNode<IConstructor>[] prod, int dot, List<InputMatcher> matchers, Set<Integer> visitedNodes) {
+		if (prod == null || dot < 0 || dot >= prod.length) {
 			return;
 		}
-		visitedNodes.add(last.getId());
+
+		AbstractStackNode<IConstructor> last = prod[dot];
 
 		if (isNullable(last)) {
 			addEndMatchers(prod, dot-1, matchers, visitedNodes);
@@ -226,10 +264,13 @@ public Void visit(NonTerminalStackNode<IConstructor> nonTerminal) {
 				String name = nonTerminal.getName();
 				AbstractStackNode<IConstructor>[] alternatives = expectsProvider.getExpects(name);
 				for (AbstractStackNode<IConstructor> alternative : alternatives) {
-					addEndMatchers(alternative.getProduction(), 0, matchers, visitedNodes);
+					AbstractStackNode<IConstructor>[] children = alternative.getProduction();
+					addPrefixSharedEndMatchers(children, children.length-1, matchers, visitedNodes);
 				}
 				return null;
 			}
+
+			// TODO: list of characters so we can match things like identifiers
 		});
 	}
 
@@ -302,7 +343,9 @@ public Void visit(NonTerminalStackNode<IConstructor> nonTerminal) {
 				}
 
 				return null;
-		}
+			}
+
+			// TODO: list of characters so we can match things like identifiers
 		});
 	}
 
@@ -322,38 +365,6 @@ private boolean isNullable(AbstractStackNode<IConstructor> stackNode) {
 		return false;
 	}
 
-	// Check if a node is a top-level production (i.e., its parent production node has no parents and
-	// starts at position -1)
-	// As this is experimental code, this method is extremely conservative.
-	// Any sharing will result in returning 'false'.
-	// We will need to change this strategy in the future to improve error recovery.
-	private boolean isTopLevelProduction(AbstractStackNode<IConstructor> node) {
-
-		while (node != null && node.getDot() != 0) {
-			node = getSinglePredecessor(node);
-		}
-
-		if (node != null) {
-			node = getSinglePredecessor(node);
-			return node != null && node.getStartLocation() == -1;
-		}
-
-		return false;
-	}
-
-	private AbstractStackNode<IConstructor> getSinglePredecessor(AbstractStackNode<IConstructor> node) {
-		IntegerObjectList<EdgesSet<IConstructor>> edgeMap = node.getEdges();
-		if (edgeMap.size() == 1) {
-			EdgesSet<IConstructor> edges = edgeMap.getValue(0);
-			if (edges.size() == 1) {
-				return edges.get(0);
-			}
-		}
-
-		return null;
-	}
-	
-
     private DoubleArrayList<AbstractStackNode<IConstructor>, AbstractNode> reviveFailedNodes(
 		int[] input,
 		int location,

From f708b01abd8e91b9cbb7f94e189e8aae4c756f94 Mon Sep 17 00:00:00 2001
From: Pieter Olivier <pieter.olivier@swat.engineering>
Date: Sun, 3 Nov 2024 11:41:35 +0100
Subject: [PATCH 2/3] Added prefix sharing test

---
 .../recovery/PrefixSharingRecoveryTest.rsc    | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PrefixSharingRecoveryTest.rsc

diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PrefixSharingRecoveryTest.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PrefixSharingRecoveryTest.rsc
new file mode 100644
index 0000000000..97792dd0d9
--- /dev/null
+++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PrefixSharingRecoveryTest.rsc
@@ -0,0 +1,19 @@
+module lang::rascal::tests::concrete::recovery::PrefixSharingRecoveryTest
+
+import lang::rascal::tests::concrete::recovery::RecoveryTestSupport;
+
+layout Layout = [\ ]* !>> [\ ];
+
+start syntax Prog = Expr Expr;
+
+syntax Expr = [s] "+" "1"
+    | [s] "+" "2";
+
+test bool prefixSharedOk() = checkRecovery(#Prog, "s+2s+1", []);
+
+// When taking prefix sharing into account when determining end matchers in the error recover,
+// prefixSharedError1()  and 2 will succeed.
+// However, this costs considerable performance for very little gain in practice,
+// so for now we have disabled the prefix sharing support in `addPrefixSharedEndMatchers` 
+//test bool prefixSharedError1() = checkRecovery(#Prog, "s-1s+1", ["-1"], visualize=false);
+test bool prefixSharedError2() = checkRecovery(#Prog, "s-2s+1", ["-2"], visualize=false);

From 7e3f4f4b914fd3e0726cb23e40da084a145556b7 Mon Sep 17 00:00:00 2001
From: Pieter Olivier <pieter.olivier@swat.engineering>
Date: Sun, 3 Nov 2024 12:21:44 +0100
Subject: [PATCH 3/3] Simplified "isTopLevelProduction" implementation

---
 .../parser/uptr/recovery/ToTokenRecoverer.java    | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java b/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java
index 10c566cfca..14125251d3 100644
--- a/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java
+++ b/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java
@@ -147,6 +147,13 @@ private List<SkippingStackNode<IConstructor>> findSkippingNodes(int[] input, int
 			return nodes; // No other nodes would be useful
 		}
 
+		// If we are the top-level node, just skip the rest of the input
+		if (!recoveryNode.isEndNode() && isTopLevelProduction(recoveryNode)) {
+			result = SkippingStackNode.createResultUntilEndOfInput(uri, input, startLocation);
+			nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(), prod, result, startLocation));
+			return nodes; // No other nodes would be useful
+		}
+
 		// Find the last token of this production and skip until after that
 		List<InputMatcher> endMatchers = findEndMatchers(recoveryNode.getProduction()[recoveryNode.getDot()]);
 		for (InputMatcher endMatcher : endMatchers) {
@@ -234,7 +241,13 @@ private boolean maybeEndNode(AbstractStackNode<IConstructor>[] prod, int dot) {
 
 		throw new AssertionError("No end node found?"); // Should not happen, last node is always an end node
 	}
-	
+
+	// Check if a node is a top-level production (i.e., its parent production node has no parents and
+	// starts at position -1)
+	private boolean isTopLevelProduction(AbstractStackNode<IConstructor> node) {
+		return node.getProduction()[0].getStartLocation() == -1;
+	}
+
 	private void addEndMatchers(AbstractStackNode<IConstructor>[] prod, int dot, List<InputMatcher> matchers, Set<Integer> visitedNodes) {
 		if (prod == null || dot < 0 || dot >= prod.length) {
 			return;