From bf4c3d99f761d5cb79ec59e2e92d1524ad06a496 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Sun, 3 Nov 2024 11:35:15 +0100 Subject: [PATCH 1/3] Implemented and disabled prefix-sharing support when finding end matcher Disabled because in practice this seems to costs a lot of performance for very little gain. --- .../uptr/recovery/ToTokenRecoverer.java | 109 ++++++++++-------- 1 file changed, 60 insertions(+), 49 deletions(-) diff --git a/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java b/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java index 307c71838f..10c566cfca 100644 --- a/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java +++ b/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java @@ -27,6 +27,7 @@ import org.rascalmpl.parser.gtd.stack.CaseInsensitiveLiteralStackNode; import org.rascalmpl.parser.gtd.stack.EmptyStackNode; import org.rascalmpl.parser.gtd.stack.EpsilonStackNode; +import org.rascalmpl.parser.gtd.stack.ListStackNode; import org.rascalmpl.parser.gtd.stack.LiteralStackNode; import org.rascalmpl.parser.gtd.stack.NonTerminalStackNode; import org.rascalmpl.parser.gtd.stack.RecoveryPointStackNode; @@ -42,6 +43,7 @@ import org.rascalmpl.parser.gtd.util.Stack; import org.rascalmpl.parser.uptr.recovery.InputMatcher.MatchResult; import org.rascalmpl.parser.util.ParseStateVisualizer; +import org.rascalmpl.values.RascalValueFactory; import org.rascalmpl.values.parsetrees.ProductionAdapter; import io.usethesource.vallang.IConstructor; @@ -143,17 +145,10 @@ private List> findSkippingNodes(int[] input, int result = SkippingStackNode.createResultUntilEndOfInput(uri, input, startLocation); nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(), prod, result, startLocation)); return nodes; // No other nodes would be useful - } - - // If we are the top-level node, just skip the rest of the input - if (!recoveryNode.isEndNode() && isTopLevelProduction(recoveryNode)) { - result = SkippingStackNode.createResultUntilEndOfInput(uri, input, startLocation); - nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(), prod, result, startLocation)); - return nodes; // No other nodes would be useful } // Find the last token of this production and skip until after that - List endMatchers = findEndMatchers(recoveryNode); + List endMatchers = findEndMatchers(recoveryNode.getProduction()[recoveryNode.getDot()]); for (InputMatcher endMatcher : endMatchers) { // For now take a very large (basically unlimited) "max match length", experiment with smaller limit later MatchResult endMatch = endMatcher.findMatch(input, startLocation, Integer.MAX_VALUE/2); @@ -182,7 +177,7 @@ private List findEndMatchers(AbstractStackNode stack final List matchers = new java.util.ArrayList<>(); AbstractStackNode[] prod = stackNode.getProduction(); - addEndMatchers(prod, prod.length-1, matchers, new HashSet<>()); + addPrefixSharedEndMatchers(prod, stackNode.getDot(), matchers, new HashSet<>()); IConstructor parentProduction = stackNode.getParentProduction(); if (parentProduction != null && ProductionAdapter.isContextFree(parentProduction)) { @@ -191,18 +186,61 @@ private List findEndMatchers(AbstractStackNode stack return matchers; } - - private void addEndMatchers(AbstractStackNode[] prod, int dot, List matchers, - Set visitedNodes) { + + @SuppressWarnings("java:S5125") // We purposely commented out some code + private void addPrefixSharedEndMatchers(AbstractStackNode[] prod, int dot, List matchers, Set visitedNodes) { if (prod == null || dot < 0 || dot >= prod.length) { return; } - AbstractStackNode last = prod[dot]; - if (visitedNodes.contains(last.getId())) { + for (int i=dot; i last = prod[i]; + + if (visitedNodes.contains(last.getId())) { + continue; + } + visitedNodes.add(last.getId()); + + if (maybeEndNode(prod, i)) { + addEndMatchers(prod, i, matchers, visitedNodes); + } + + // Although also following prefix shared productions to find end matchers helps + // in contrived examples (see PrefixSharingRecoveryTest.rsc), it does not significantly improve recovery + // quality in practice while slowing down error recovery considerably. + // So we have disabled this for now, but we may visit this topic in the future. + + /*AbstractStackNode[][] alternateProductions = last.getAlternateProductions(); + if (alternateProductions != null) { + for (AbstractStackNode[] alternateProduction : alternateProductions) { + addPrefixSharedEndMatchers(alternateProduction, dot, matchers, visitedNodes); + } + } + */ + } + } + + private boolean maybeEndNode(AbstractStackNode[] prod, int dot) { + while (dot < prod.length) { + if (prod[dot].isEndNode()) { + return true; + } + if (!isNullable(prod[dot])) { + return false; + } + + dot = dot + 1; + } + + throw new AssertionError("No end node found?"); // Should not happen, last node is always an end node + } + + private void addEndMatchers(AbstractStackNode[] prod, int dot, List matchers, Set visitedNodes) { + if (prod == null || dot < 0 || dot >= prod.length) { return; } - visitedNodes.add(last.getId()); + + AbstractStackNode last = prod[dot]; if (isNullable(last)) { addEndMatchers(prod, dot-1, matchers, visitedNodes); @@ -226,10 +264,13 @@ public Void visit(NonTerminalStackNode nonTerminal) { String name = nonTerminal.getName(); AbstractStackNode[] alternatives = expectsProvider.getExpects(name); for (AbstractStackNode alternative : alternatives) { - addEndMatchers(alternative.getProduction(), 0, matchers, visitedNodes); + AbstractStackNode[] children = alternative.getProduction(); + addPrefixSharedEndMatchers(children, children.length-1, matchers, visitedNodes); } return null; } + + // TODO: list of characters so we can match things like identifiers }); } @@ -302,7 +343,9 @@ public Void visit(NonTerminalStackNode nonTerminal) { } return null; - } + } + + // TODO: list of characters so we can match things like identifiers }); } @@ -322,38 +365,6 @@ private boolean isNullable(AbstractStackNode stackNode) { return false; } - // Check if a node is a top-level production (i.e., its parent production node has no parents and - // starts at position -1) - // As this is experimental code, this method is extremely conservative. - // Any sharing will result in returning 'false'. - // We will need to change this strategy in the future to improve error recovery. - private boolean isTopLevelProduction(AbstractStackNode node) { - - while (node != null && node.getDot() != 0) { - node = getSinglePredecessor(node); - } - - if (node != null) { - node = getSinglePredecessor(node); - return node != null && node.getStartLocation() == -1; - } - - return false; - } - - private AbstractStackNode getSinglePredecessor(AbstractStackNode node) { - IntegerObjectList> edgeMap = node.getEdges(); - if (edgeMap.size() == 1) { - EdgesSet edges = edgeMap.getValue(0); - if (edges.size() == 1) { - return edges.get(0); - } - } - - return null; - } - - private DoubleArrayList, AbstractNode> reviveFailedNodes( int[] input, int location, From f708b01abd8e91b9cbb7f94e189e8aae4c756f94 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Sun, 3 Nov 2024 11:41:35 +0100 Subject: [PATCH 2/3] Added prefix sharing test --- .../recovery/PrefixSharingRecoveryTest.rsc | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PrefixSharingRecoveryTest.rsc diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PrefixSharingRecoveryTest.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PrefixSharingRecoveryTest.rsc new file mode 100644 index 0000000000..97792dd0d9 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PrefixSharingRecoveryTest.rsc @@ -0,0 +1,19 @@ +module lang::rascal::tests::concrete::recovery::PrefixSharingRecoveryTest + +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; + +layout Layout = [\ ]* !>> [\ ]; + +start syntax Prog = Expr Expr; + +syntax Expr = [s] "+" "1" + | [s] "+" "2"; + +test bool prefixSharedOk() = checkRecovery(#Prog, "s+2s+1", []); + +// When taking prefix sharing into account when determining end matchers in the error recover, +// prefixSharedError1() and 2 will succeed. +// However, this costs considerable performance for very little gain in practice, +// so for now we have disabled the prefix sharing support in `addPrefixSharedEndMatchers` +//test bool prefixSharedError1() = checkRecovery(#Prog, "s-1s+1", ["-1"], visualize=false); +test bool prefixSharedError2() = checkRecovery(#Prog, "s-2s+1", ["-2"], visualize=false); From 7e3f4f4b914fd3e0726cb23e40da084a145556b7 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Sun, 3 Nov 2024 12:21:44 +0100 Subject: [PATCH 3/3] Simplified "isTopLevelProduction" implementation --- .../parser/uptr/recovery/ToTokenRecoverer.java | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java b/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java index 10c566cfca..14125251d3 100644 --- a/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java +++ b/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java @@ -147,6 +147,13 @@ private List> findSkippingNodes(int[] input, int return nodes; // No other nodes would be useful } + // If we are the top-level node, just skip the rest of the input + if (!recoveryNode.isEndNode() && isTopLevelProduction(recoveryNode)) { + result = SkippingStackNode.createResultUntilEndOfInput(uri, input, startLocation); + nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(), prod, result, startLocation)); + return nodes; // No other nodes would be useful + } + // Find the last token of this production and skip until after that List endMatchers = findEndMatchers(recoveryNode.getProduction()[recoveryNode.getDot()]); for (InputMatcher endMatcher : endMatchers) { @@ -234,7 +241,13 @@ private boolean maybeEndNode(AbstractStackNode[] prod, int dot) { throw new AssertionError("No end node found?"); // Should not happen, last node is always an end node } - + + // Check if a node is a top-level production (i.e., its parent production node has no parents and + // starts at position -1) + private boolean isTopLevelProduction(AbstractStackNode node) { + return node.getProduction()[0].getStartLocation() == -1; + } + private void addEndMatchers(AbstractStackNode[] prod, int dot, List matchers, Set visitedNodes) { if (prod == null || dot < 0 || dot >= prod.length) { return;