Skip to content

Commit

Permalink
Merge branch 'master' into usfm-tag-prop-fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
johnml1135 authored Oct 24, 2024
2 parents b8fdb04 + cc7c4ea commit f7fd79c
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,13 @@ public Word ApplyRhs(PatternRule<Word, ShapeNode> rule, Match<Word, ShapeNode> m
_allomorph,
output.MorphologicalRuleApplicationCount.ToString()
);

if (outputNewMorph == null)
{
// There are no new output morphs in a truncation rule,
// so we add its allomorph to the last output shape.
string morphID = output.MorphologicalRuleApplicationCount.ToString();
output.MarkMorph(new List<ShapeNode>() { output.Shape.Last }, _allomorph, morphID);
}
var markedAllomorphs = new HashSet<Allomorph>();
foreach (Annotation<ShapeNode> inputMorph in match.Input.Morphs)
{
Expand All @@ -178,11 +184,24 @@ public Word ApplyRhs(PatternRule<Word, ShapeNode> rule, Match<Word, ShapeNode> m
}
else if (inputMorph.Parent == null && !markedAllomorphs.Contains(allomorph))
{
// an existing morph has been completely subsumed by the new morph
// mark the subsumed morph so we don't lose track of it
// this is only necessary if the allomorph hasn't already been marked elsewhere
Annotation<ShapeNode> outputMorph = output.MarkSubsumedMorph(outputNewMorph, allomorph, morphID);
MarkSubsumedMorphs(match.Input, output, inputMorph, outputMorph);
// This is only necessary if the allomorph hasn't already been marked elsewhere.
if (outputNewMorph == null)
{
// There are no new output morphs in a truncation rule,
// So we add allomorph to the first output shape.
output.MarkMorph(new List<ShapeNode>() { output.Shape.First }, allomorph, morphID);
}
else
{
// an existing morph has been completely subsumed by the new morph
// mark the subsumed morph so we don't lose track of it
Annotation<ShapeNode> outputMorph = output.MarkSubsumedMorph(
outputNewMorph,
allomorph,
morphID
);
MarkSubsumedMorphs(match.Input, output, inputMorph, outputMorph);
}
}
markedAllomorphs.Add(allomorph);
}
Expand Down
10 changes: 2 additions & 8 deletions src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ public string UpdateUsfm(
string bookId,
IReadOnlyList<(IReadOnlyList<ScriptureRef>, string)> rows,
string fullName = null,
bool stripAllText = false,
bool preferExistingText = true
UpdateUsfmBehavior behavior = UpdateUsfmBehavior.PreferExisting
)
{
string fileName = _settings.GetBookFileName(bookId);
Expand All @@ -37,12 +36,7 @@ public string UpdateUsfm(
usfm = reader.ReadToEnd();
}

var handler = new UpdateUsfmParserHandler(
rows,
fullName is null ? null : $"- {fullName}",
stripAllText,
preferExistingText: preferExistingText
);
var handler = new UpdateUsfmParserHandler(rows, fullName is null ? null : $"- {fullName}", behavior);
try
{
UsfmParser.Parse(usfm, handler, _settings.Stylesheet, _settings.Versification);
Expand Down
21 changes: 14 additions & 7 deletions src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@

namespace SIL.Machine.Corpora
{
public enum UpdateUsfmBehavior
{
PreferExisting,
PreferNew,
StripExisting
}

/***
* This is a USFM parser handler that can be used to replace the existing text in a USFM file with the specified
* text.
Expand All @@ -14,26 +21,23 @@ public class UpdateUsfmParserHandler : ScriptureRefUsfmParserHandlerBase
private readonly List<UsfmToken> _tokens;
private readonly List<UsfmToken> _newTokens;
private readonly string _idText;
private readonly bool _stripAllText;
private readonly bool _preferExistingText;
private readonly UpdateUsfmBehavior _behavior;
private readonly Stack<bool> _replace;
private int _rowIndex;
private int _tokenIndex;

public UpdateUsfmParserHandler(
IReadOnlyList<(IReadOnlyList<ScriptureRef>, string)> rows = null,
string idText = null,
bool stripAllText = false,
bool preferExistingText = false
UpdateUsfmBehavior behavior = UpdateUsfmBehavior.PreferExisting
)
{
_rows = rows ?? Array.Empty<(IReadOnlyList<ScriptureRef>, string)>();
_tokens = new List<UsfmToken>();
_newTokens = new List<UsfmToken>();
_idText = idText;
_stripAllText = stripAllText;
_replace = new Stack<bool>();
_preferExistingText = preferExistingText;
_behavior = behavior;
}

public IReadOnlyList<UsfmToken> Tokens => _tokens;
Expand Down Expand Up @@ -371,7 +375,10 @@ private bool ReplaceWithNewTokens(UsfmParserState state)
break;
}
}
bool useNewTokens = _stripAllText || (newText && !existingText) || (newText && !_preferExistingText);
bool useNewTokens =
_behavior == UpdateUsfmBehavior.StripExisting
|| (newText && !existingText)
|| (newText && _behavior == UpdateUsfmBehavior.PreferNew);

if (useNewTokens)
_tokens.AddRange(_newTokens);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1192,7 +1192,7 @@ public void TruncateRules()
Morphophonemic.MorphologicalRules.Add(truncate);

var morpher = new Morpher(TraceManager, Language);
AssertMorphsEqual(morpher.ParseWord("sa"), "32");
AssertMorphsEqual(morpher.ParseWord("sa"), "32 3SG");

truncate.Allomorphs.Clear();
truncate.Allomorphs.Add(
Expand All @@ -1208,7 +1208,7 @@ public void TruncateRules()
);

morpher = new Morpher(TraceManager, Language);
AssertMorphsEqual(morpher.ParseWord("ag"), "32");
AssertMorphsEqual(morpher.ParseWord("ag"), "32 3SG");

truncate.Allomorphs.Clear();
truncate.Allomorphs.Add(
Expand All @@ -1224,7 +1224,7 @@ public void TruncateRules()
);

morpher = new Morpher(TraceManager, Language);
AssertMorphsEqual(morpher.ParseWord("ag"), "32");
AssertMorphsEqual(morpher.ParseWord("ag"), "32 3SG");

truncate.Allomorphs.Clear();
truncate.Allomorphs.Add(
Expand All @@ -1240,7 +1240,7 @@ public void TruncateRules()
);

morpher = new Morpher(TraceManager, Language);
AssertMorphsEqual(morpher.ParseWord("sa"), "32");
AssertMorphsEqual(morpher.ParseWord("sa"), "32 3SG");

truncate.Allomorphs.Clear();
truncate.Allomorphs.Add(
Expand Down Expand Up @@ -1866,10 +1866,32 @@ public void SubsumedAffix()
);
Morphophonemic.MorphologicalRules.Add(nominalizer);

var deleteVowelSuffix = new AffixProcessRule
{
Name = "delete_vowel_suffix",
Gloss = "PRES",
RequiredSyntacticFeatureStruct = FeatureStruct.New(Language.SyntacticFeatureSystem).Symbol("V").Value,
};
deleteVowelSuffix.Allomorphs.Add(
new AffixProcessAllomorph
{
Lhs =
{
Pattern<Word, ShapeNode>.New("1").Annotation(any).OneOrMore.Value,
Pattern<Word, ShapeNode>.New("2").Annotation(vowel).Value
},
Rhs = { new CopyFromInput("1") }
}
);
Morphophonemic.MorphologicalRules.Add(deleteVowelSuffix);

var morpher = new Morpher(TraceManager, Language);
AssertMorphsEqual(morpher.ParseWord("tagu"), "47 3SG");
AssertMorphsEqual(morpher.ParseWord("tags"), "47 3SG PAST");
AssertMorphsEqual(morpher.ParseWord("tagsv"), "47 3SG PAST NOM");
// Test deleteVowelSuffix.
AssertMorphsEqual(morpher.ParseWord("tag"), "47 3SG PRES", "47");
AssertMorphsEqual(morpher.ParseWord("bubib"), "42 PRES", "43 PRES");
}

[Test]
Expand Down
24 changes: 6 additions & 18 deletions tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public void GetUsfm_IdText()
[Test]
public void GetUsfm_StripAllText()
{
string target = UpdateUsfm(stripAllText: true);
string target = UpdateUsfm(behavior: UpdateUsfmBehavior.StripExisting);
Assert.That(target, Contains.Substring("\\id MAT\r\n"));
Assert.That(target, Contains.Substring("\\v 1\r\n"));
Assert.That(target, Contains.Substring("\\s\r\n"));
Expand All @@ -43,7 +43,7 @@ public void GetUsfm_PreferExisting()
(ScrRef("MAT 1:6"), "Text 6"),
(ScrRef("MAT 1:7"), "Text 7"),
};
string target = UpdateUsfm(rows, preferExistingText: true);
string target = UpdateUsfm(rows, behavior: UpdateUsfmBehavior.PreferExisting);
Assert.That(target, Contains.Substring("\\id MAT - Test\r\n"));
Assert.That(target, Contains.Substring("\\v 6 Verse 6 content.\r\n"));
Assert.That(target, Contains.Substring("\\v 7 Text 7\r\n"));
Expand All @@ -57,7 +57,7 @@ public void GetUsfm_PreferRows()
(ScrRef("MAT 1:6"), "Text 6"),
(ScrRef("MAT 1:7"), "Text 7"),
};
string target = UpdateUsfm(rows, preferExistingText: false);
string target = UpdateUsfm(rows, behavior: UpdateUsfmBehavior.PreferNew);
Assert.That(target, Contains.Substring("\\id MAT - Test\r\n"));
Assert.That(target, Contains.Substring("\\v 6 Text 6\r\n"));
Assert.That(target, Contains.Substring("\\v 7 Text 7\r\n"));
Expand Down Expand Up @@ -438,30 +438,18 @@ private static string UpdateUsfm(
IReadOnlyList<(IReadOnlyList<ScriptureRef>, string)>? rows = null,
string? source = null,
string? idText = null,
bool stripAllText = false,
bool preferExistingText = false
UpdateUsfmBehavior behavior = UpdateUsfmBehavior.PreferNew
)
{
if (source is null)
{
var updater = new FileParatextProjectTextUpdater(CorporaTestHelpers.UsfmTestProjectPath);
return updater.UpdateUsfm(
"MAT",
rows,
fullName: idText,
stripAllText: stripAllText,
preferExistingText: preferExistingText
);
return updater.UpdateUsfm("MAT", rows, idText, behavior);
}
else
{
source = source.Trim().ReplaceLineEndings("\r\n") + "\r\n";
var updater = new UpdateUsfmParserHandler(
rows,
idText,
stripAllText: stripAllText,
preferExistingText: preferExistingText
);
var updater = new UpdateUsfmParserHandler(rows, idText, behavior);
UsfmParser.Parse(source, updater);
return updater.GetUsfm();
}
Expand Down
5 changes: 2 additions & 3 deletions tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ string sfmFileName in Directory
string bookId;
if (!targetSettings.IsBookFileName(sfmFileName, out bookId))
continue;
string newUsfm = updater.UpdateUsfm(bookId, pretranslations, stripAllText: true, preferExistingText: false);
string newUsfm = updater.UpdateUsfm(bookId, pretranslations, behavior: UpdateUsfmBehavior.StripExisting);
Assert.That(newUsfm, Is.Not.Null);
}
}
Expand Down Expand Up @@ -150,8 +150,7 @@ async Task GetUsfmAsync(string projectPath)
string newUsfm = updater.UpdateUsfm(
bookId,
pretranslations,
stripAllText: true,
preferExistingText: true
behavior: UpdateUsfmBehavior.StripExisting
);
Assert.That(newUsfm, Is.Not.Null);
}
Expand Down

0 comments on commit f7fd79c

Please sign in to comment.