From dfbb754bf2c37637923b20a06254f12f0dbc7b5b Mon Sep 17 00:00:00 2001 From: John T Maxwell III Date: Wed, 23 Oct 2024 07:51:31 -0700 Subject: [PATCH 1/2] Fix LT-21939 (#264) * Fix LT-21939 * Fix format * Fix format --- .../SynthesisAffixProcessAllomorphRuleSpec.cs | 31 +++++++++++++++---- .../AffixProcessRuleTests.cs | 30 +++++++++++++++--- 2 files changed, 51 insertions(+), 10 deletions(-) diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisAffixProcessAllomorphRuleSpec.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisAffixProcessAllomorphRuleSpec.cs index a1c80d66d..aab74102d 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisAffixProcessAllomorphRuleSpec.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisAffixProcessAllomorphRuleSpec.cs @@ -165,7 +165,13 @@ public Word ApplyRhs(PatternRule rule, Match m _allomorph, output.MorphologicalRuleApplicationCount.ToString() ); - + if (outputNewMorph == null) + { + // There are no new output morphs in a truncation rule, + // so we add its allomorph to the last output shape. + string morphID = output.MorphologicalRuleApplicationCount.ToString(); + output.MarkMorph(new List() { output.Shape.Last }, _allomorph, morphID); + } var markedAllomorphs = new HashSet(); foreach (Annotation inputMorph in match.Input.Morphs) { @@ -178,11 +184,24 @@ public Word ApplyRhs(PatternRule rule, Match m } else if (inputMorph.Parent == null && !markedAllomorphs.Contains(allomorph)) { - // an existing morph has been completely subsumed by the new morph - // mark the subsumed morph so we don't lose track of it - // this is only necessary if the allomorph hasn't already been marked elsewhere - Annotation outputMorph = output.MarkSubsumedMorph(outputNewMorph, allomorph, morphID); - MarkSubsumedMorphs(match.Input, output, inputMorph, outputMorph); + // This is only necessary if the allomorph hasn't already been marked elsewhere. + if (outputNewMorph == null) + { + // There are no new output morphs in a truncation rule, + // So we add allomorph to the first output shape. + output.MarkMorph(new List() { output.Shape.First }, allomorph, morphID); + } + else + { + // an existing morph has been completely subsumed by the new morph + // mark the subsumed morph so we don't lose track of it + Annotation outputMorph = output.MarkSubsumedMorph( + outputNewMorph, + allomorph, + morphID + ); + MarkSubsumedMorphs(match.Input, output, inputMorph, outputMorph); + } } markedAllomorphs.Add(allomorph); } diff --git a/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorphologicalRules/AffixProcessRuleTests.cs b/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorphologicalRules/AffixProcessRuleTests.cs index efc768277..d222c9476 100644 --- a/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorphologicalRules/AffixProcessRuleTests.cs +++ b/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorphologicalRules/AffixProcessRuleTests.cs @@ -1192,7 +1192,7 @@ public void TruncateRules() Morphophonemic.MorphologicalRules.Add(truncate); var morpher = new Morpher(TraceManager, Language); - AssertMorphsEqual(morpher.ParseWord("sa"), "32"); + AssertMorphsEqual(morpher.ParseWord("sa"), "32 3SG"); truncate.Allomorphs.Clear(); truncate.Allomorphs.Add( @@ -1208,7 +1208,7 @@ public void TruncateRules() ); morpher = new Morpher(TraceManager, Language); - AssertMorphsEqual(morpher.ParseWord("ag"), "32"); + AssertMorphsEqual(morpher.ParseWord("ag"), "32 3SG"); truncate.Allomorphs.Clear(); truncate.Allomorphs.Add( @@ -1224,7 +1224,7 @@ public void TruncateRules() ); morpher = new Morpher(TraceManager, Language); - AssertMorphsEqual(morpher.ParseWord("ag"), "32"); + AssertMorphsEqual(morpher.ParseWord("ag"), "32 3SG"); truncate.Allomorphs.Clear(); truncate.Allomorphs.Add( @@ -1240,7 +1240,7 @@ public void TruncateRules() ); morpher = new Morpher(TraceManager, Language); - AssertMorphsEqual(morpher.ParseWord("sa"), "32"); + AssertMorphsEqual(morpher.ParseWord("sa"), "32 3SG"); truncate.Allomorphs.Clear(); truncate.Allomorphs.Add( @@ -1866,10 +1866,32 @@ public void SubsumedAffix() ); Morphophonemic.MorphologicalRules.Add(nominalizer); + var deleteVowelSuffix = new AffixProcessRule + { + Name = "delete_vowel_suffix", + Gloss = "PRES", + RequiredSyntacticFeatureStruct = FeatureStruct.New(Language.SyntacticFeatureSystem).Symbol("V").Value, + }; + deleteVowelSuffix.Allomorphs.Add( + new AffixProcessAllomorph + { + Lhs = + { + Pattern.New("1").Annotation(any).OneOrMore.Value, + Pattern.New("2").Annotation(vowel).Value + }, + Rhs = { new CopyFromInput("1") } + } + ); + Morphophonemic.MorphologicalRules.Add(deleteVowelSuffix); + var morpher = new Morpher(TraceManager, Language); AssertMorphsEqual(morpher.ParseWord("tagu"), "47 3SG"); AssertMorphsEqual(morpher.ParseWord("tags"), "47 3SG PAST"); AssertMorphsEqual(morpher.ParseWord("tagsv"), "47 3SG PAST NOM"); + // Test deleteVowelSuffix. + AssertMorphsEqual(morpher.ParseWord("tag"), "47 3SG PRES", "47"); + AssertMorphsEqual(morpher.ParseWord("bubib"), "42 PRES", "43 PRES"); } [Test] From cc7c4eae80df9f2226036e6f32e3e369ca5ab45f Mon Sep 17 00:00:00 2001 From: Damien Daspit Date: Fri, 18 Oct 2024 17:54:00 -0500 Subject: [PATCH 2/2] Refactor stripAllText and preferExistingText into a single enum --- .../Corpora/ParatextProjectTextUpdaterBase.cs | 10 ++------ .../Corpora/UpdateUsfmParserHandler.cs | 21 ++++++++++------ .../Corpora/UpdateUsfmParserHandlerTests.cs | 24 +++++-------------- .../Corpora/UsfmManualTests.cs | 5 ++-- 4 files changed, 24 insertions(+), 36 deletions(-) diff --git a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs index 07c2ca6c0..ea86eb60d 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs @@ -23,8 +23,7 @@ public string UpdateUsfm( string bookId, IReadOnlyList<(IReadOnlyList, string)> rows, string fullName = null, - bool stripAllText = false, - bool preferExistingText = true + UpdateUsfmBehavior behavior = UpdateUsfmBehavior.PreferExisting ) { string fileName = _settings.GetBookFileName(bookId); @@ -37,12 +36,7 @@ public string UpdateUsfm( usfm = reader.ReadToEnd(); } - var handler = new UpdateUsfmParserHandler( - rows, - fullName is null ? null : $"- {fullName}", - stripAllText, - preferExistingText: preferExistingText - ); + var handler = new UpdateUsfmParserHandler(rows, fullName is null ? null : $"- {fullName}", behavior); try { UsfmParser.Parse(usfm, handler, _settings.Stylesheet, _settings.Versification); diff --git a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs index ce16c03d3..03c9bb12b 100644 --- a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs +++ b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs @@ -4,6 +4,13 @@ namespace SIL.Machine.Corpora { + public enum UpdateUsfmBehavior + { + PreferExisting, + PreferNew, + StripExisting + } + /*** * This is a USFM parser handler that can be used to replace the existing text in a USFM file with the specified * text. @@ -14,8 +21,7 @@ public class UpdateUsfmParserHandler : ScriptureRefUsfmParserHandlerBase private readonly List _tokens; private readonly List _newTokens; private readonly string _idText; - private readonly bool _stripAllText; - private readonly bool _preferExistingText; + private readonly UpdateUsfmBehavior _behavior; private readonly Stack _replace; private int _rowIndex; private int _tokenIndex; @@ -23,17 +29,15 @@ public class UpdateUsfmParserHandler : ScriptureRefUsfmParserHandlerBase public UpdateUsfmParserHandler( IReadOnlyList<(IReadOnlyList, string)> rows = null, string idText = null, - bool stripAllText = false, - bool preferExistingText = false + UpdateUsfmBehavior behavior = UpdateUsfmBehavior.PreferExisting ) { _rows = rows ?? Array.Empty<(IReadOnlyList, string)>(); _tokens = new List(); _newTokens = new List(); _idText = idText; - _stripAllText = stripAllText; _replace = new Stack(); - _preferExistingText = preferExistingText; + _behavior = behavior; } public IReadOnlyList Tokens => _tokens; @@ -371,7 +375,10 @@ private bool ReplaceWithNewTokens(UsfmParserState state) break; } } - bool useNewTokens = _stripAllText || (newText && !existingText) || (newText && !_preferExistingText); + bool useNewTokens = + _behavior == UpdateUsfmBehavior.StripExisting + || (newText && !existingText) + || (newText && _behavior == UpdateUsfmBehavior.PreferNew); if (useNewTokens) _tokens.AddRange(_newTokens); diff --git a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs index 51cde1971..d27873d3b 100644 --- a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs @@ -28,7 +28,7 @@ public void GetUsfm_IdText() [Test] public void GetUsfm_StripAllText() { - string target = UpdateUsfm(stripAllText: true); + string target = UpdateUsfm(behavior: UpdateUsfmBehavior.StripExisting); Assert.That(target, Contains.Substring("\\id MAT\r\n")); Assert.That(target, Contains.Substring("\\v 1\r\n")); Assert.That(target, Contains.Substring("\\s\r\n")); @@ -43,7 +43,7 @@ public void GetUsfm_PreferExisting() (ScrRef("MAT 1:6"), "Text 6"), (ScrRef("MAT 1:7"), "Text 7"), }; - string target = UpdateUsfm(rows, preferExistingText: true); + string target = UpdateUsfm(rows, behavior: UpdateUsfmBehavior.PreferExisting); Assert.That(target, Contains.Substring("\\id MAT - Test\r\n")); Assert.That(target, Contains.Substring("\\v 6 Verse 6 content.\r\n")); Assert.That(target, Contains.Substring("\\v 7 Text 7\r\n")); @@ -57,7 +57,7 @@ public void GetUsfm_PreferRows() (ScrRef("MAT 1:6"), "Text 6"), (ScrRef("MAT 1:7"), "Text 7"), }; - string target = UpdateUsfm(rows, preferExistingText: false); + string target = UpdateUsfm(rows, behavior: UpdateUsfmBehavior.PreferNew); Assert.That(target, Contains.Substring("\\id MAT - Test\r\n")); Assert.That(target, Contains.Substring("\\v 6 Text 6\r\n")); Assert.That(target, Contains.Substring("\\v 7 Text 7\r\n")); @@ -438,30 +438,18 @@ private static string UpdateUsfm( IReadOnlyList<(IReadOnlyList, string)>? rows = null, string? source = null, string? idText = null, - bool stripAllText = false, - bool preferExistingText = false + UpdateUsfmBehavior behavior = UpdateUsfmBehavior.PreferNew ) { if (source is null) { var updater = new FileParatextProjectTextUpdater(CorporaTestHelpers.UsfmTestProjectPath); - return updater.UpdateUsfm( - "MAT", - rows, - fullName: idText, - stripAllText: stripAllText, - preferExistingText: preferExistingText - ); + return updater.UpdateUsfm("MAT", rows, idText, behavior); } else { source = source.Trim().ReplaceLineEndings("\r\n") + "\r\n"; - var updater = new UpdateUsfmParserHandler( - rows, - idText, - stripAllText: stripAllText, - preferExistingText: preferExistingText - ); + var updater = new UpdateUsfmParserHandler(rows, idText, behavior); UsfmParser.Parse(source, updater); return updater.GetUsfm(); } diff --git a/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs b/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs index 88773c858..b2dbdcf32 100644 --- a/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs @@ -49,7 +49,7 @@ string sfmFileName in Directory string bookId; if (!targetSettings.IsBookFileName(sfmFileName, out bookId)) continue; - string newUsfm = updater.UpdateUsfm(bookId, pretranslations, stripAllText: true, preferExistingText: false); + string newUsfm = updater.UpdateUsfm(bookId, pretranslations, behavior: UpdateUsfmBehavior.StripExisting); Assert.That(newUsfm, Is.Not.Null); } } @@ -150,8 +150,7 @@ async Task GetUsfmAsync(string projectPath) string newUsfm = updater.UpdateUsfm( bookId, pretranslations, - stripAllText: true, - preferExistingText: true + behavior: UpdateUsfmBehavior.StripExisting ); Assert.That(newUsfm, Is.Not.Null); }