diff --git a/src/SIL.Machine.Morphology.HermitCrab/Allomorph.cs b/src/SIL.Machine.Morphology.HermitCrab/Allomorph.cs index 0c06cdf1d..fbb32db72 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/Allomorph.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/Allomorph.cs @@ -72,6 +72,11 @@ public IDictionary Properties get { return _properties; } } + /// + /// Was this allomorph guessed by a lexical pattern? + /// + public bool Guessed { get; set; } + public bool FreeFluctuatesWith(Allomorph other) { if (this == other) diff --git a/src/SIL.Machine.Morphology.HermitCrab/AnalysisStratumRule.cs b/src/SIL.Machine.Morphology.HermitCrab/AnalysisStratumRule.cs index 88c06984e..97c31c2e4 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/AnalysisStratumRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/AnalysisStratumRule.cs @@ -90,6 +90,8 @@ public IEnumerable Apply(Word input) output.Add(mruleOutWord); if (_morpher.TraceManager.IsTracing) _morpher.TraceManager.EndUnapplyStratum(_stratum, mruleOutWord); + if (_morpher.MaxUnapplications > 0 && output.Count >= _morpher.MaxUnapplications) + break; } return output; } diff --git a/src/SIL.Machine.Morphology.HermitCrab/CharacterDefinitionTable.cs b/src/SIL.Machine.Morphology.HermitCrab/CharacterDefinitionTable.cs index f8b9b3543..2854481e2 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/CharacterDefinitionTable.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/CharacterDefinitionTable.cs @@ -14,11 +14,13 @@ public class CharacterDefinitionTable : ICollection { private readonly Dictionary _charDefLookup; private readonly HashSet _charDefs; + private readonly Dictionary _naturalClassLookup; public CharacterDefinitionTable() { _charDefLookup = new Dictionary(); _charDefs = new HashSet(); + _naturalClassLookup = new Dictionary(); } public string Name { get; set; } @@ -43,6 +45,11 @@ public CharacterDefinition AddBoundary(IEnumerable strRep) return Add(strRep, HCFeatureSystem.Boundary, null); } + public void AddNaturalClass(NaturalClass naturalClass) + { + _naturalClassLookup[naturalClass.Name] = naturalClass; + } + /// /// Adds the character definition. /// @@ -98,11 +105,14 @@ public IEnumerable GetMatchingStrReps(ShapeNode node) } } - private bool GetShapeNodes(string str, out IEnumerable nodes, out int errorPos) + private bool GetShapeNodes(string str, bool allowPattern, out IEnumerable nodes, out int errorPos) { var nodesList = new List(); int i = 0; string normalized = str.Normalize(NormalizationForm.FormD); + bool optional = false; + int optionalPos = 0; + int optionalCount = 0; while (i < normalized.Length) { bool match = false; @@ -120,15 +130,88 @@ private bool GetShapeNodes(string str, out IEnumerable nodes, out int break; } } - - if (!match) + if (match) + continue; + if (allowPattern) { - nodes = null; - errorPos = i; - if (!str.IsNormalized(NormalizationForm.FormD)) - errorPos = normalized.Substring(0, errorPos).Normalize().Length; - return false; + // Check for pattern language. + // NB: This only happens when the characters don't match. + // I thought about implementing this using Pattern, + // but the Matcher doesn't preserve the unifications of the nodes. + if (normalized[i] == '[') + { + // Example: [Seg]. + // Look for a natural class. + int closePos = normalized.IndexOf("]", i); + if (closePos > 0) + { + string className = normalized.Substring(i + 1, closePos - i - 1); + if (_naturalClassLookup.ContainsKey(className)) + { + NaturalClass naturalClass = _naturalClassLookup[className]; + var node = new ShapeNode(naturalClass.FeatureStruct); + nodesList.Add(node); + i = closePos + 1; + continue; + } + } + } + else if (normalized[i] == '(') + { + if (i + 1 < normalized.Length && normalized[i + 1] == '[') + { + // The natural class that follows is optional. + // Wait for the close parenthesis to process. + optional = true; + optionalPos = i; + optionalCount = nodesList.Count; + i++; + continue; + } + } + else if (normalized[i] == ')') + { + if (optional && nodesList.Count == optionalCount + 1) + { + // Example: ([Seg]). + // Ill-formed: ([C][V]). + // Make the last node optional. + nodesList[nodesList.Count - 1].Annotation.Optional = true; + optional = false; + i++; + continue; + } + } + else if (normalized[i] == '*') + { + if (i > 0 && normalized[i - 1] == ']') + { + // Example: [Seg]*. + // Make the last node Kleene star. + nodesList[nodesList.Count - 1].Annotation.Optional = true; + nodesList[nodesList.Count - 1].SetIterative(true); + i++; + continue; + } + } + // Kleene plus doesn't work because '+' is a boundary marker. } + + // Failure + nodes = null; + errorPos = i; + if (!str.IsNormalized(NormalizationForm.FormD)) + errorPos = normalized.Substring(0, errorPos).Normalize().Length; + return false; + } + if (optional) + { + // The open parenthesis didn't get closed. + nodes = null; + errorPos = optionalPos; + if (!str.IsNormalized(NormalizationForm.FormD)) + errorPos = normalized.Substring(0, errorPos).Normalize().Length; + return false; } nodes = nodesList; errorPos = -1; @@ -136,10 +219,15 @@ private bool GetShapeNodes(string str, out IEnumerable nodes, out int } public Shape Segment(string str) + { + return Segment(str, false); + } + + public Shape Segment(string str, bool allowPattern) { IEnumerable nodes; int errorPos; - if (GetShapeNodes(str, out nodes, out errorPos)) + if (GetShapeNodes(str, allowPattern, out nodes, out errorPos)) { var shape = new Shape(begin => new ShapeNode( begin ? HCFeatureSystem.LeftSideAnchor : HCFeatureSystem.RightSideAnchor @@ -162,7 +250,7 @@ public int TrySegment(string str, out Shape shape) { IEnumerable nodes; int errorPos; - if (GetShapeNodes(str, out nodes, out errorPos)) + if (GetShapeNodes(str, true, out nodes, out errorPos)) { shape = new Shape(begin => new ShapeNode( begin ? HCFeatureSystem.LeftSideAnchor : HCFeatureSystem.RightSideAnchor diff --git a/src/SIL.Machine.Morphology.HermitCrab/HermitCrabExtensions.cs b/src/SIL.Machine.Morphology.HermitCrab/HermitCrabExtensions.cs index e9b81d0a3..bd05a1c74 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/HermitCrabExtensions.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/HermitCrabExtensions.cs @@ -88,6 +88,19 @@ internal static void SetDeleted(this ShapeNode node, bool deleted) ); } + internal static bool IsIterative(this ShapeNode node) + { + return node.Annotation.Data != null; + } + + internal static void SetIterative(this ShapeNode node, bool iterative) + { + if (iterative) + node.Annotation.Data = iterative; + else + node.Annotation.Data = null; + } + private static readonly IEqualityComparer NodeComparer = new ProjectionEqualityComparer< ShapeNode, FeatureStruct diff --git a/src/SIL.Machine.Morphology.HermitCrab/Morpher.cs b/src/SIL.Machine.Morphology.HermitCrab/Morpher.cs index 4e92c395c..c6c58a919 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/Morpher.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/Morpher.cs @@ -14,6 +14,7 @@ #if !SINGLE_THREADED using System.Collections.Concurrent; using System.Threading.Tasks; +using System.Text; #endif namespace SIL.Machine.Morphology.HermitCrab @@ -26,6 +27,7 @@ public class Morpher : IMorphologicalAnalyzer, IMorphologicalGenerator private readonly Dictionary _allomorphTries; private readonly ITraceManager _traceManager; private readonly ReadOnlyObservableCollection _morphemes; + private readonly IList _lexicalPatterns = new List(); public Morpher(ITraceManager traceManager, Language lang) { @@ -38,7 +40,12 @@ public Morpher(ITraceManager traceManager, Language lang) var allomorphs = new HashSet(stratum.Entries.SelectMany(entry => entry.Allomorphs)); var trie = new RootAllomorphTrie(ann => ann.Type() == HCFeatureSystem.Segment); foreach (RootAllomorph allomorph in allomorphs) - trie.Add(allomorph); + { + if (allomorph.IsPattern) + _lexicalPatterns.Add(allomorph); + else + trie.Add(allomorph); + } _allomorphTries[stratum] = trie; morphemes.AddRange(stratum.Entries); @@ -48,6 +55,7 @@ public Morpher(ITraceManager traceManager, Language lang) _analysisRule = lang.CompileAnalysisRule(this); _synthesisRule = lang.CompileSynthesisRule(this); MaxStemCount = 2; + MaxUnapplications = 0; LexEntrySelector = entry => true; RuleSelector = rule => true; @@ -63,6 +71,13 @@ public ITraceManager TraceManager public int MaxStemCount { get; set; } + /// + /// MaxUnapplications limits the number of unapplications to make it possible + /// to make it possible to debug words that take 30 minutes to parse + /// because there are too many unapplications. + /// + public int MaxUnapplications { get; set; } + public Func LexEntrySelector { get; set; } public Func RuleSelector { get; set; } @@ -76,10 +91,19 @@ public Language Language /// public IEnumerable ParseWord(string word) { - return ParseWord(word, out _); + return ParseWord(word, out _, false); } public IEnumerable ParseWord(string word, out object trace) + { + return ParseWord(word, out trace, false); + } + + /// + /// Parse the specified surface form, possibly tracing the parse. + /// If there are no analyses and guessRoot is true, then guess the root. + /// + public IEnumerable ParseWord(string word, out object trace, bool guessRoot) { // convert the word to its phonetic shape Shape shape = _lang.SurfaceStratum.CharacterDefinitionTable.Segment(word); @@ -104,8 +128,30 @@ public IEnumerable ParseWord(string word, out object trace) File.WriteAllLines("analyses.txt", lines.OrderBy(l => l)); #endif + IList origAnalyses = guessRoot ? analyses.ToList() : null; + IList syntheses = Synthesize(word, analyses).ToList(); + if (guessRoot && syntheses.Count == 0) + { + // Guess roots when there are no results. + List matches = new List(); + foreach (Word analysisWord in origAnalyses) + { + var lexicalGuesses = LexicalGuess(analysisWord).Distinct(); + foreach (Word synthesisWord in lexicalGuesses) + { + foreach (Word validWord in _synthesisRule.Apply(synthesisWord).Where(IsWordValid)) + { + if (IsMatch(word, validWord)) + matches.Add(validWord); + } + } + } - return Synthesize(word, analyses); + matches.Sort((x, y) => y.Morphs.Count().CompareTo(x.Morphs.Count())); + + return matches; + } + return syntheses; } /// @@ -309,6 +355,188 @@ LexEntry entry in SearchRootAllomorphs(input.Stratum, input.Shape) } } + /// + /// Match the input against lexical patterns and return matches. + /// + private IEnumerable LexicalGuess(Word input) + { + if (_traceManager.IsTracing) + _traceManager.LexicalLookup(input.Stratum, input); + CharacterDefinitionTable table = input.Stratum.CharacterDefinitionTable; + IEnumerable shapeNodes = input.Shape.GetNodes(input.Range); + HashSet shapeSet = new HashSet(); + foreach (RootAllomorph lexicalPattern in _lexicalPatterns) + { + IEnumerable shapePattern = lexicalPattern.Segments.Shape.GetNodes( + lexicalPattern.Segments.Shape.Range + ); + foreach (List match in MatchNodesWithPattern(shapeNodes.ToList(), shapePattern.ToList())) + { + IEnumerable shapeStrings = new List() { match.ToString(table, false) }; + // We could set shapeStrings to GetShapeStrings(match, table), + // but that produces spurious ambiguities that don't seem to have any value. + foreach (string shapeString in shapeStrings) + { + if (shapeSet.Contains(shapeString)) + // Avoid duplicates caused by multiple paths through pattern (e.g. ([Seg])([Seg])). + continue; + shapeSet.Add(shapeString); + // Create a root allomorph for the guess. + var root = new RootAllomorph(new Segments(table, shapeString)) { Guessed = true }; + root.AllomorphCoOccurrenceRules.AddRange(lexicalPattern.AllomorphCoOccurrenceRules); + root.Environments.AddRange(lexicalPattern.Environments); + root.Properties.AddRange(lexicalPattern.Properties); + root.StemName = lexicalPattern.StemName; + root.IsBound = lexicalPattern.IsBound; + // Create a lexical entry to hold the root allomorph. + // (The root's Morpheme will point to the lexical entry.) + var lexEntry = new LexEntry + { + Id = shapeString, + Gloss = shapeString, + IsPartial = input.SyntacticFeatureStruct.IsEmpty, + SyntacticFeatureStruct = input.SyntacticFeatureStruct, + Stratum = input.Stratum, + }; + lexEntry.Allomorphs.Add(root); + // Point the root allomorph to the lexical pattern in FieldWorks. + if (lexicalPattern.Morpheme != null) + { + // Copy Morpheme fields. + Morpheme morpheme = lexicalPattern.Morpheme; + lexEntry.MorphemeCoOccurrenceRules.AddRange(morpheme.MorphemeCoOccurrenceRules); + lexEntry.Properties.AddRange(morpheme.Properties); + lexEntry.Stratum = morpheme.Stratum; + LexEntry patternEntry = (LexEntry)morpheme; + if (patternEntry != null) + { + // Copy LexEntry fields. + lexEntry.MprFeatures = patternEntry.MprFeatures; + lexEntry.SyntacticFeatureStruct = patternEntry.SyntacticFeatureStruct; + lexEntry.IsPartial = patternEntry.IsPartial; + } + } + // Create a new word that uses the root allomorph. + Word newWord = input.Clone(); + newWord.RootAllomorph = root; + if (_traceManager.IsTracing) + _traceManager.SynthesizeWord(_lang, newWord); + newWord.Freeze(); + yield return newWord; + } + } + } + } + + /// + /// Match the shape nodes against the shape pattern. + /// This can produce multiple outputs if there is more than one path. + /// The outputs can be different because it unifies the nodes. + /// + public IEnumerable> MatchNodesWithPattern( + IList nodes, + IList pattern, + int n = 0, + int p = 0, + bool obligatory = false, + List prefix = null + ) + { + var results = new List>(); + if (prefix == null) + prefix = new List(); + if (pattern.Count == p) + { + if (nodes.Count == n) + // We match because we are at the end of both the pattern and the nodes. + results.Add(prefix); + return results; + } + if (pattern[p].Annotation.Optional && !obligatory) + // Try skipping this item in the pattern. + results.AddRange(MatchNodesWithPattern(nodes, pattern, n, p + 1, false, prefix)); + if (nodes.Count == n) + { + // We fail to match because we are at the end of the nodes but not the pattern. + return results; + } + ShapeNode newNode = UnifyShapeNodes(nodes[n], pattern[p]); + if (newNode == null) + // We fail because the pattern didn't match the node here. + return results; + // Make a copy of prefix to avoid crosstalk and add newNode. + prefix = new List(prefix) { newNode }; + if (pattern[p].IsIterative()) + // Try using this item in the pattern again. + results.AddRange(MatchNodesWithPattern(nodes, pattern, n + 1, p, true, prefix)); + // Try the remainder of the nodes against the remainder of the pattern. + results.AddRange(MatchNodesWithPattern(nodes, pattern, n + 1, p + 1, false, prefix)); + return results; + } + + ShapeNode UnifyShapeNodes(ShapeNode node, ShapeNode pattern) + { + FeatureStruct fs = null; + node.Annotation.FeatureStruct.Unify(pattern.Annotation.FeatureStruct, out fs); + if (fs == null) + return null; + if (fs.ValueEquals(node.Annotation.FeatureStruct)) + return node; + return new ShapeNode(fs); + } + + private IEnumerable GetShapeStrings(IList nodes, CharacterDefinitionTable table) + { + IList strings = new List(); + if (nodes.Count == 0) + { + // We are at the end of the nodes. + strings.Add(""); + return strings; + } + + // Pop the first node. + ShapeNode node = nodes[0]; + nodes.RemoveAt(0); + + // Get suffixes. + IEnumerable suffixes = GetShapeStrings(nodes, table); + if ((node.Annotation.Type() == HCFeatureSystem.Boundary) || node.IsDeleted()) + // Skip this node. + return suffixes; + IEnumerable strReps = table.GetMatchingStrReps(node); + if (strReps.Count() == 0) + // Skip this node; + return suffixes; + + // Get string reps with unique feature structures. + IList uniqueStrReps = new List(); + foreach (string strRep in strReps) + { + CharacterDefinition cd = table[strRep]; + bool found = false; + foreach (string uniqueStrRep in uniqueStrReps) + { + CharacterDefinition uniqueCd = table[uniqueStrRep]; + if (uniqueCd.FeatureStruct.ValueEquals(cd.FeatureStruct)) + { + found = true; + break; + } + } + if (!found) + uniqueStrReps.Add(strRep); + } + + // take the cross-product of uniqueStrReps and suffixes. + foreach (string uniqueStrRep in uniqueStrReps) + { + foreach (string suffix in suffixes) + strings.Add(uniqueStrRep + suffix); + } + return strings; + } + private bool IsWordValid(Word word) { if ( @@ -379,6 +607,18 @@ public IEnumerable AnalyzeWord(string word) } } + public IEnumerable AnalyzeWord(string word, bool guessRoot) + { + try + { + return ParseWord(word, out _, guessRoot).Select(CreateWordAnalysis); + } + catch (InvalidShapeException) + { + return Enumerable.Empty(); + } + } + private WordAnalysis CreateWordAnalysis(Word result) { int rootMorphemeIndex = -1; diff --git a/src/SIL.Machine.Morphology.HermitCrab/RootAllomorph.cs b/src/SIL.Machine.Morphology.HermitCrab/RootAllomorph.cs index 76d480bf3..bfc020eb0 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/RootAllomorph.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/RootAllomorph.cs @@ -1,4 +1,5 @@ using System.Linq; +using SIL.Machine.Annotations; namespace SIL.Machine.Morphology.HermitCrab { @@ -15,6 +16,16 @@ public class RootAllomorph : Allomorph public RootAllomorph(Segments segments) { _segments = segments; + foreach (ShapeNode node in _segments.Shape.GetNodes(_segments.Shape.Range)) + { + if ( + node.IsIterative() + || (node.Annotation.Optional && node.Annotation.Type() != HCFeatureSystem.Boundary) + ) + { + IsPattern = true; + } + } } /// @@ -29,6 +40,11 @@ public Segments Segments public bool IsBound { get; set; } + /// + /// Does this represent a lexical pattern (e.g. [Seg]*)? + /// + public bool IsPattern { get; private set; } + protected override bool ConstraintsEqual(Allomorph other) { if (!(other is RootAllomorph otherAllo)) diff --git a/src/SIL.Machine.Morphology.HermitCrab/Segments.cs b/src/SIL.Machine.Morphology.HermitCrab/Segments.cs index fa6152d83..dca5ee474 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/Segments.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/Segments.cs @@ -11,6 +11,9 @@ public class Segments public Segments(CharacterDefinitionTable table, string representation) : this(table, representation, table.Segment(representation)) { } + public Segments(CharacterDefinitionTable table, string representation, bool allowPattern) + : this(table, representation, table.Segment(representation, allowPattern)) { } + public Segments(CharacterDefinitionTable table, string representation, Shape shape) { _representation = representation; diff --git a/src/SIL.Machine.Morphology.HermitCrab/XmlLanguageLoader.cs b/src/SIL.Machine.Morphology.HermitCrab/XmlLanguageLoader.cs index cacb9b550..91801135a 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/XmlLanguageLoader.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/XmlLanguageLoader.cs @@ -498,7 +498,7 @@ private bool TryLoadLexEntry(XElement entryElem, CharacterDefinitionTable table, private RootAllomorph LoadRootAllomorph(XElement alloElem, CharacterDefinitionTable table) { var shapeStr = (string)alloElem.Element("PhoneticShape"); - Segments segments = new Segments(table, shapeStr); + Segments segments = new Segments(table, shapeStr, true); if (segments.Shape.All(n => n.Type() == HCFeatureSystem.Boundary)) throw new InvalidShapeException(shapeStr, 0); var allomorph = new RootAllomorph(segments) { IsBound = (bool?)alloElem.Attribute("isBound") ?? false }; @@ -717,6 +717,10 @@ private void LoadNaturalClass(XElement natClassElem) _language.NaturalClasses.Add(nc); _natClasses[(string)natClassElem.Attribute("id")] = nc; + foreach (CharacterDefinitionTable table in _language.CharacterDefinitionTables) + { + table.AddNaturalClass(nc); + } } private void LoadPhonologicalRule(XElement pruleElem) diff --git a/tests/SIL.Machine.Morphology.HermitCrab.Tests/HermitCrabTestBase.cs b/tests/SIL.Machine.Morphology.HermitCrab.Tests/HermitCrabTestBase.cs index f307253f8..d6b56ed60 100644 --- a/tests/SIL.Machine.Morphology.HermitCrab.Tests/HermitCrabTestBase.cs +++ b/tests/SIL.Machine.Morphology.HermitCrab.Tests/HermitCrabTestBase.cs @@ -815,7 +815,7 @@ public void TestCleanup() } } - private LexEntry AddEntry(string gloss, FeatureStruct syntacticFS, Stratum stratum, params string[] forms) + public LexEntry AddEntry(string gloss, FeatureStruct syntacticFS, Stratum stratum, params string[] forms) { var entry = new LexEntry { @@ -825,7 +825,7 @@ private LexEntry AddEntry(string gloss, FeatureStruct syntacticFS, Stratum strat IsPartial = syntacticFS.IsEmpty }; foreach (string form in forms) - entry.Allomorphs.Add(new RootAllomorph(new Segments(stratum.CharacterDefinitionTable, form))); + entry.Allomorphs.Add(new RootAllomorph(new Segments(stratum.CharacterDefinitionTable, form, true))); stratum.Entries.Add(entry); Entries[gloss] = entry; return entry; diff --git a/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorpherTests.cs b/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorpherTests.cs index 477096776..a01ec12c0 100644 --- a/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorpherTests.cs +++ b/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorpherTests.cs @@ -64,6 +64,41 @@ public void AnalyzeWord_CannotAnalyze_ReturnsEmptyEnumerable() Assert.That(morpher.AnalyzeWord("sagt"), Is.Empty); } + [Test] + public void AnalyzeWord_CanGuess_ReturnsCorrectAnalysis() + { + var any = FeatureStruct.New().Symbol(HCFeatureSystem.Segment).Value; + + var edSuffix = new AffixProcessRule + { + Id = "PAST", + Name = "ed_suffix", + Gloss = "PAST", + RequiredSyntacticFeatureStruct = FeatureStruct.New(Language.SyntacticFeatureSystem).Symbol("V").Value + }; + edSuffix.Allomorphs.Add( + new AffixProcessAllomorph + { + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+d") } + } + ); + Morphophonemic.MorphologicalRules.Add(edSuffix); + + var naturalClass = new NaturalClass(new FeatureStruct()) { Name = "Any" }; + Morphophonemic.CharacterDefinitionTable.AddNaturalClass(naturalClass); + AddEntry("pattern", new FeatureStruct(), Morphophonemic, "[Any]*"); + + var morpher = new Morpher(TraceManager, Language); + + Assert.That(morpher.AnalyzeWord("gag"), Is.Empty); + Assert.That(morpher.AnalyzeWord("gagd"), Is.Empty); + var analyses = morpher.AnalyzeWord("gag", true).ToList(); + Assert.That(analyses[0].ToString(), Is.EquivalentTo("[*gag]")); + var analyses2 = morpher.AnalyzeWord("gagd", true).ToList(); + Assert.That(analyses2[0].ToString(), Is.EquivalentTo("[*gag ed_suffix]")); + } + [Test] public void GenerateWords_CanGenerate_ReturnsCorrectWord() { @@ -135,4 +170,114 @@ public void GenerateWords_CannotGenerate_ReturnsEmptyEnumerable() var analysis = new WordAnalysis(new IMorpheme[] { Entries["32"], edSuffix }, 0, "V"); Assert.That(morpher.GenerateWords(analysis), Is.Empty); } + + [Test] + public void TestMatchNodesWithPattern() + { + Morpher morpher = new Morpher(TraceManager, Language); + Feature feat1 = new StringFeature("1"); + Feature feat2 = new StringFeature("2"); + FeatureValue valueA = new StringFeatureValue("A"); + FeatureValue valueB = new StringFeatureValue("B"); + FeatureStruct fs1A = new FeatureStruct(); + FeatureStruct fs2B = new FeatureStruct(); + fs1A.AddValue(feat1, valueA); + fs2B.AddValue(feat2, valueB); + + // Test feature matching. + List nodesfs1A = new List { new ShapeNode(fs1A) }; + List nodesfs2B = new List { new ShapeNode(fs2B) }; + var fs1A2B = morpher.MatchNodesWithPattern(nodesfs1A, nodesfs2B); + Assert.That( + fs1A2B.ToList()[0][0].Annotation.FeatureStruct.GetValue(feat1).ToString(), + Is.EqualTo(valueA.ToString()) + ); + Assert.That( + fs1A2B.ToList()[0][0].Annotation.FeatureStruct.GetValue(feat2).ToString(), + Is.EqualTo(valueB.ToString()) + ); + + IList noNodes = GetNodes(""); + IList oneNode = GetNodes("a"); + IList twoNodes = GetNodes("aa"); + IList threeNodes = GetNodes("aaa"); + IList fourNodes = GetNodes("aaaa"); + var naturalClass = new NaturalClass(new FeatureStruct()) { Name = "Any" }; + Table2.AddNaturalClass(naturalClass); + + // Test sequences. + Assert.That(morpher.MatchNodesWithPattern(oneNode, GetNodes("i")), Is.Empty); + Assert.That( + morpher.MatchNodesWithPattern(oneNode, oneNode), + Is.EqualTo(new List> { oneNode }) + ); + Assert.That( + morpher.MatchNodesWithPattern(twoNodes, twoNodes), + Is.EquivalentTo(new List> { twoNodes }) + ); + Assert.That( + morpher.MatchNodesWithPattern(threeNodes, threeNodes), + Is.EquivalentTo(new List> { threeNodes }) + ); + + // Test optionality. + IList optionalPattern = GetNodes("([Any])"); + Assert.That( + morpher.MatchNodesWithPattern(noNodes, optionalPattern), + Is.EquivalentTo(new List> { noNodes }) + ); + Assert.That( + morpher.MatchNodesWithPattern(oneNode, optionalPattern), + Is.EquivalentTo(new List> { oneNode }) + ); + Assert.That(morpher.MatchNodesWithPattern(twoNodes, optionalPattern), Is.Empty); + + // Test ambiguity. + // (It is up to the caller to eliminate duplicates.) + IList optionalPattern2 = GetNodes("([Any])([Any])"); + Assert.That( + morpher.MatchNodesWithPattern(noNodes, optionalPattern2), + Is.EquivalentTo(new List> { noNodes }) + ); + Assert.That( + morpher.MatchNodesWithPattern(oneNode, optionalPattern2), + Is.EquivalentTo(new List> { oneNode, oneNode }) + ); + Assert.That( + morpher.MatchNodesWithPattern(twoNodes, optionalPattern2), + Is.EquivalentTo(new List> { twoNodes }) + ); + Assert.That(morpher.MatchNodesWithPattern(threeNodes, optionalPattern2), Is.Empty); + + // Test Kleene star. + IList starPattern = GetNodes("[Any]*"); + Assert.That( + morpher.MatchNodesWithPattern(noNodes, starPattern), + Is.EquivalentTo(new List> { noNodes }) + ); + Assert.That( + morpher.MatchNodesWithPattern(oneNode, starPattern), + Is.EquivalentTo(new List> { oneNode }) + ); + Assert.That( + morpher.MatchNodesWithPattern(twoNodes, starPattern), + Is.EquivalentTo(new List> { twoNodes }) + ); + + // Test Kleene plus look alike ("+" is a boundary marker). + IList plusPattern = GetNodes("[Any]+"); + Assert.That(morpher.MatchNodesWithPattern(noNodes, plusPattern), Is.Empty); + Assert.That( + morpher.MatchNodesWithPattern(oneNode, plusPattern), + Is.EquivalentTo(new List> { oneNode }) + ); + Assert.That(morpher.MatchNodesWithPattern(twoNodes, plusPattern), Is.Empty); + } + + IList GetNodes(string pattern) + { + // Use Table2 because it has boundaries defined. + Shape shape = new Segments(Table2, pattern, true).Shape; + return shape.GetNodes(shape.Range).ToList(); + } }