Skip to content

Commit

Permalink
Address Damien's concerns
Browse files Browse the repository at this point in the history
  • Loading branch information
jtmaxwell3 committed Sep 27, 2024
1 parent 2800d5c commit fcb2c65
Show file tree
Hide file tree
Showing 9 changed files with 206 additions and 124 deletions.
111 changes: 60 additions & 51 deletions src/SIL.Machine.Morphology.HermitCrab/CharacterDefinitionTable.cs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ public IEnumerable<string> GetMatchingStrReps(ShapeNode node)
}
}

private bool GetShapeNodes(string str, out IEnumerable<ShapeNode> nodes, out int errorPos)
private bool GetShapeNodes(string str, bool allowPattern, out IEnumerable<ShapeNode> nodes, out int errorPos)
{
var nodesList = new List<ShapeNode>();
int i = 0;
Expand All @@ -132,66 +132,70 @@ private bool GetShapeNodes(string str, out IEnumerable<ShapeNode> nodes, out int
}
if (match)
continue;

// Check for pattern language.
// NB: This only happens when the characters don't match.
if (normalized[i] == '[')
if (allowPattern)
{
// Example: [Seg].
// Look for a natural class.
int closePos = normalized.IndexOf("]", i);
if (closePos > 0)
// Check for pattern language.
// NB: This only happens when the characters don't match.
// I thought about implementing this using Pattern<Shape, ShapeNode>,
// but the Matcher doesn't preserve the unifications of the nodes.
if (normalized[i] == '[')
{
string className = normalized.Substring(i + 1, closePos - i - 1);
if (_naturalClassLookup.ContainsKey(className))
// Example: [Seg].
// Look for a natural class.
int closePos = normalized.IndexOf("]", i);
if (closePos > 0)
{
NaturalClass naturalClass = _naturalClassLookup[className];
var node = new ShapeNode(naturalClass.FeatureStruct);
nodesList.Add(node);
i = closePos + 1;
continue;
string className = normalized.Substring(i + 1, closePos - i - 1);
if (_naturalClassLookup.ContainsKey(className))
{
NaturalClass naturalClass = _naturalClassLookup[className];
var node = new ShapeNode(naturalClass.FeatureStruct);
nodesList.Add(node);
i = closePos + 1;
continue;
}
}
}
}
else if (normalized[i] == '(')
{
if (i + 1 < normalized.Length && normalized[i + 1] == '[')
else if (normalized[i] == '(')
{
// The natural class that follows is optional.
// Wait for the close parenthesis to process.
optional = true;
optionalPos = i;
optionalCount = nodesList.Count;
i++;
continue;
if (i + 1 < normalized.Length && normalized[i + 1] == '[')
{
// The natural class that follows is optional.
// Wait for the close parenthesis to process.
optional = true;
optionalPos = i;
optionalCount = nodesList.Count;
i++;
continue;
}
}
}
else if (normalized[i] == ')')
{
if (optional && nodesList.Count == optionalCount + 1)
else if (normalized[i] == ')')
{
// Example: ([Seg]).
// Ill-formed: ([C][V]).
// Make the last node optional.
nodesList[nodesList.Count - 1].Annotation.Optional = true;
optional = false;
i++;
continue;
if (optional && nodesList.Count == optionalCount + 1)
{
// Example: ([Seg]).
// Ill-formed: ([C][V]).
// Make the last node optional.
nodesList[nodesList.Count - 1].Annotation.Optional = true;
optional = false;
i++;
continue;
}
}
}
else if (normalized[i] == '*')
{
if (i > 0 && normalized[i - 1] == ']')
else if (normalized[i] == '*')
{
// Example: [Seg]*.
// Make the last node Kleene star.
nodesList[nodesList.Count - 1].Annotation.Optional = true;
nodesList[nodesList.Count - 1].Annotation.Iterative = true;
i++;
continue;
if (i > 0 && normalized[i - 1] == ']')
{
// Example: [Seg]*.
// Make the last node Kleene star.
nodesList[nodesList.Count - 1].Annotation.Optional = true;
nodesList[nodesList.Count - 1].Iterative = true;
i++;
continue;
}
}
// Kleene plus doesn't work because '+' is a boundary marker.
}
// Kleene plus doesn't work because '+' is a boundary marker.

// Failure
nodes = null;
Expand All @@ -215,10 +219,15 @@ private bool GetShapeNodes(string str, out IEnumerable<ShapeNode> nodes, out int
}

public Shape Segment(string str)
{
return Segment(str, false);
}

public Shape Segment(string str, bool allowPattern)
{
IEnumerable<ShapeNode> nodes;
int errorPos;
if (GetShapeNodes(str, out nodes, out errorPos))
if (GetShapeNodes(str, allowPattern, out nodes, out errorPos))
{
var shape = new Shape(begin => new ShapeNode(
begin ? HCFeatureSystem.LeftSideAnchor : HCFeatureSystem.RightSideAnchor
Expand All @@ -241,7 +250,7 @@ public int TrySegment(string str, out Shape shape)
{
IEnumerable<ShapeNode> nodes;
int errorPos;
if (GetShapeNodes(str, out nodes, out errorPos))
if (GetShapeNodes(str, true, out nodes, out errorPos))
{
shape = new Shape(begin => new ShapeNode(
begin ? HCFeatureSystem.LeftSideAnchor : HCFeatureSystem.RightSideAnchor
Expand Down
135 changes: 105 additions & 30 deletions src/SIL.Machine.Morphology.HermitCrab/Morpher.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#if !SINGLE_THREADED
using System.Collections.Concurrent;
using System.Threading.Tasks;
using System.Text;
#endif

namespace SIL.Machine.Morphology.HermitCrab
Expand Down Expand Up @@ -371,36 +372,58 @@ private IEnumerable<Word> LexicalGuess(Word input)
);
foreach (List<ShapeNode> match in MatchNodesWithPattern(shapeNodes.ToList(), shapePattern.ToList()))
{
// Create a root allomorph for the guess.
string shapeString = match.ToString(table, false);
if (shapeSet.Contains(shapeString))
// Avoid duplicates caused by multiple paths through pattern (e.g. ([Seg])([Seg])).
continue;
shapeSet.Add(shapeString);
var root = new RootAllomorph(new Segments(table, shapeString)) { Guessed = true };
// Create a lexical entry to hold the root allomorph.
// (The root's Morpheme will point to the lexical entry.)
var lexEntry = new LexEntry
IEnumerable<string> shapeStrings = new List<string>() { match.ToString(table, false) };
// We could set shapeStrings to GetShapeStrings(match, table),
// but that produces spurious ambiguities that don't seem to have any value.
foreach (string shapeString in shapeStrings)
{
Id = shapeString,
SyntacticFeatureStruct = input.SyntacticFeatureStruct,
Gloss = shapeString,
Stratum = input.Stratum,
IsPartial = input.SyntacticFeatureStruct.IsEmpty
};
lexEntry.Allomorphs.Add(root);
// Point the root allomorph to the lexical pattern in FieldWorks.
if (lexicalPattern.Properties.ContainsKey("ID"))
root.Properties["ID"] = lexicalPattern.Properties["ID"];
if (lexicalPattern.Morpheme != null && lexicalPattern.Morpheme.Properties.ContainsKey("ID"))
root.Morpheme.Properties["ID"] = lexicalPattern.Morpheme.Properties["ID"];
// Create a new word that uses the root allomorph.
Word newWord = input.Clone();
newWord.RootAllomorph = root;
if (_traceManager.IsTracing)
_traceManager.SynthesizeWord(_lang, newWord);
newWord.Freeze();
yield return newWord;
if (shapeSet.Contains(shapeString))
// Avoid duplicates caused by multiple paths through pattern (e.g. ([Seg])([Seg])).
continue;
shapeSet.Add(shapeString);
// Create a root allomorph for the guess.
var root = new RootAllomorph(new Segments(table, shapeString)) { Guessed = true };
root.AllomorphCoOccurrenceRules.AddRange(lexicalPattern.AllomorphCoOccurrenceRules);
root.Environments.AddRange(lexicalPattern.Environments);
root.Properties.AddRange(lexicalPattern.Properties);
root.StemName = lexicalPattern.StemName;
root.IsBound = lexicalPattern.IsBound;
// Create a lexical entry to hold the root allomorph.
// (The root's Morpheme will point to the lexical entry.)
var lexEntry = new LexEntry
{
Id = shapeString,
Gloss = shapeString,
IsPartial = input.SyntacticFeatureStruct.IsEmpty,
SyntacticFeatureStruct = input.SyntacticFeatureStruct,
Stratum = input.Stratum,
};
lexEntry.Allomorphs.Add(root);
// Point the root allomorph to the lexical pattern in FieldWorks.
if (lexicalPattern.Morpheme != null)
{
// Copy Morpheme fields.
Morpheme morpheme = lexicalPattern.Morpheme;
lexEntry.MorphemeCoOccurrenceRules.AddRange(morpheme.MorphemeCoOccurrenceRules);
lexEntry.Properties.AddRange(morpheme.Properties);
lexEntry.Stratum = morpheme.Stratum;
LexEntry patternEntry = (LexEntry)morpheme;
if (patternEntry != null)
{
// Copy LexEntry fields.
lexEntry.MprFeatures = patternEntry.MprFeatures;
lexEntry.SyntacticFeatureStruct = patternEntry.SyntacticFeatureStruct;
lexEntry.IsPartial = patternEntry.IsPartial;
}
}
// Create a new word that uses the root allomorph.
Word newWord = input.Clone();
newWord.RootAllomorph = root;
if (_traceManager.IsTracing)
_traceManager.SynthesizeWord(_lang, newWord);
newWord.Freeze();
yield return newWord;
}
}
}
}
Expand Down Expand Up @@ -443,7 +466,7 @@ public IEnumerable<List<ShapeNode>> MatchNodesWithPattern(
return results;
// Make a copy of prefix to avoid crosstalk and add newNode.
prefix = new List<ShapeNode>(prefix) { newNode };
if (pattern[p].Annotation.Iterative)
if (pattern[p].Iterative)
// Try using this item in the pattern again.
results.AddRange(MatchNodesWithPattern(nodes, pattern, n + 1, p, true, prefix));
// Try the remainder of the nodes against the remainder of the pattern.
Expand All @@ -462,6 +485,58 @@ ShapeNode UnifyShapeNodes(ShapeNode node, ShapeNode pattern)
return new ShapeNode(fs);
}

private IEnumerable<string> GetShapeStrings(IList<ShapeNode> nodes, CharacterDefinitionTable table)
{
IList<string> strings = new List<string>();
if (nodes.Count == 0)
{
// We are at the end of the nodes.
strings.Add("");
return strings;
}

// Pop the first node.
ShapeNode node = nodes[0];
nodes.RemoveAt(0);

// Get suffixes.
IEnumerable<string> suffixes = GetShapeStrings(nodes, table);
if ((node.Annotation.Type() == HCFeatureSystem.Boundary) || node.IsDeleted())
// Skip this node.
return suffixes;
IEnumerable<string> strReps = table.GetMatchingStrReps(node);
if (strReps.Count() == 0)
// Skip this node;
return suffixes;

// Get string reps with unique feature structures.
IList<string> uniqueStrReps = new List<string>();
foreach (string strRep in strReps)
{
CharacterDefinition cd = table[strRep];
bool found = false;
foreach (string uniqueStrRep in uniqueStrReps)
{
CharacterDefinition uniqueCd = table[uniqueStrRep];
if (uniqueCd.FeatureStruct.ValueEquals(cd.FeatureStruct))
{
found = true;
break;
}
}
if (!found)
uniqueStrReps.Add(strRep);
}

// take the cross-product of uniqueStrReps and suffixes.
foreach (string uniqueStrRep in uniqueStrReps)
{
foreach (string suffix in suffixes)
strings.Add(uniqueStrRep + suffix);
}
return strings;
}

private bool IsWordValid(Word word)
{
if (
Expand Down
30 changes: 15 additions & 15 deletions src/SIL.Machine.Morphology.HermitCrab/RootAllomorph.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.Linq;
using SIL.Machine.Annotations;
using System.Linq;

namespace SIL.Machine.Morphology.HermitCrab
{
Expand All @@ -9,12 +10,24 @@ public class RootAllomorph : Allomorph
{
private readonly Segments _segments;


/// <summary>
/// Initializes a new instance of the <see cref="RootAllomorph"/> class.
/// </summary>
public RootAllomorph(Segments segments)
{
_segments = segments;
foreach (ShapeNode node in _segments.Shape.GetNodes(_segments.Shape.Range))
{
if (
node.Iterative
|| (node.Annotation.Optional && node.Annotation.Type() != HCFeatureSystem.Boundary)
)
{
IsPattern = true;
}
}

}

/// <summary>
Expand All @@ -34,20 +47,7 @@ public Segments Segments
/// </summary>
public bool IsPattern
{
get
{
foreach (var node in _segments.Shape.GetNodes(_segments.Shape.Range))
{
if (
node.Annotation.Iterative
|| (node.Annotation.Optional && node.Annotation.Type() != HCFeatureSystem.Boundary)
)
{
return true;
}
}
return false;
}
get; private set;
}

protected override bool ConstraintsEqual(Allomorph other)
Expand Down
3 changes: 3 additions & 0 deletions src/SIL.Machine.Morphology.HermitCrab/Segments.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ public class Segments
public Segments(CharacterDefinitionTable table, string representation)
: this(table, representation, table.Segment(representation)) { }

public Segments(CharacterDefinitionTable table, string representation, bool allowPattern)
: this(table, representation, table.Segment(representation, allowPattern)) { }

public Segments(CharacterDefinitionTable table, string representation, Shape shape)
{
_representation = representation;
Expand Down
2 changes: 1 addition & 1 deletion src/SIL.Machine.Morphology.HermitCrab/XmlLanguageLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ private bool TryLoadLexEntry(XElement entryElem, CharacterDefinitionTable table,
private RootAllomorph LoadRootAllomorph(XElement alloElem, CharacterDefinitionTable table)
{
var shapeStr = (string)alloElem.Element("PhoneticShape");
Segments segments = new Segments(table, shapeStr);
Segments segments = new Segments(table, shapeStr, true);
if (segments.Shape.All(n => n.Type() == HCFeatureSystem.Boundary))
throw new InvalidShapeException(shapeStr, 0);
var allomorph = new RootAllomorph(segments) { IsBound = (bool?)alloElem.Attribute("isBound") ?? false };
Expand Down
Loading

0 comments on commit fcb2c65

Please sign in to comment.