diff --git a/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs b/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs index 605b08b2..44d280f8 100644 --- a/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs +++ b/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs @@ -11,11 +11,11 @@ public class ParatextBackupTermsCorpus : ParatextTermsCorpusBase public ParatextBackupTermsCorpus( ZipArchive archive, IEnumerable termCategories, - bool preferTermsLocalization = false + bool useTermGlosses = true ) { _archive = archive; - AddTexts(new ZipParatextProjectSettingsParser(archive).Parse(), termCategories, preferTermsLocalization); + AddTexts(new ZipParatextProjectSettingsParser(archive).Parse(), termCategories, useTermGlosses); } protected override bool Exists(string fileName) diff --git a/src/SIL.Machine/Corpora/ParatextTermsCorpusBase.cs b/src/SIL.Machine/Corpora/ParatextTermsCorpusBase.cs index 898086d4..4c86d34d 100644 --- a/src/SIL.Machine/Corpora/ParatextTermsCorpusBase.cs +++ b/src/SIL.Machine/Corpora/ParatextTermsCorpusBase.cs @@ -86,7 +86,7 @@ protected void AddTexts( if ( settings.LanguageCode != null && settings.BiblicalTermsListType == "Major" - && !SupportedLanguageTermsLocalizationXmls.TryGetValue(settings.LanguageCode, out string resourceName) + && SupportedLanguageTermsLocalizationXmls.TryGetValue(settings.LanguageCode, out string resourceName) ) { using (Stream keyTermsFile = Assembly.GetExecutingAssembly().GetManifestResourceStream(resourceName)) @@ -104,8 +104,7 @@ protected void AddTexts( } } - IDictionary> termsRenderings = - new Dictionary>(); + IDictionary> termsRenderings = new Dictionary>(); if (termRenderingsDoc != null) { termsRenderings = termRenderingsDoc @@ -120,10 +119,12 @@ protected void AddTexts( IReadOnlyList glosses = GetGlosses(gloss); return (id, glosses); }) + .GroupBy(kvp => kvp.Item1, kvp => kvp.Item2) //Handle duplicate term ids (which do exist) e.g. שִׁלֵּמִי + .Select(grouping => (grouping.Key, grouping.SelectMany(g => g))) .ToDictionary(kvp => kvp.Item1, kvp => kvp.Item2); } - IDictionary> termsGlosses = new Dictionary>(); + IDictionary> termsGlosses = new Dictionary>(); if (termsGlossesDoc != null && useTermGlosses) { termsGlosses = termsGlossesDoc @@ -134,10 +135,12 @@ protected void AddTexts( .Select(kvp => { string id = kvp.Item1.Replace("\n", " "); - string gloss = kvp.Item2.Element("Gloss").Value; + string gloss = kvp.Item2.Attribute("Gloss").Value; IReadOnlyList glosses = GetGlosses(gloss); return (id, glosses); }) + .GroupBy(kvp => kvp.Item1, kvp => kvp.Item2) + .Select(grouping => (grouping.Key, grouping.SelectMany(g => g))) .ToDictionary(kvp => kvp.Item1, kvp => kvp.Item2); } if (termsGlosses.Count > 0 || termsRenderings.Count > 0) @@ -156,8 +159,8 @@ IDictionary termIdToCategoryDictionary } private void AddTerms( - IDictionary> termsRenderings, - IDictionary> termsGlosses, + IDictionary> termsRenderings, + IDictionary> termsGlosses, ParatextProjectSettings settings ) { @@ -165,12 +168,12 @@ ParatextProjectSettings settings $"{settings.BiblicalTermsListType}:{settings.BiblicalTermsProjectName}:{settings.BiblicalTermsFileName}"; //Prefer renderings to gloss localizations - IDictionary> glosses = termsRenderings - .Concat(termsGlosses.Where(kvp => !termsGlosses.ContainsKey(kvp.Key))) + IDictionary> glosses = termsRenderings + .Concat(termsGlosses.Where(kvp => !termsRenderings.ContainsKey(kvp.Key))) .ToDictionary(kvp => kvp.Key, kvp => kvp.Value); IText text = new MemoryText( textId, - glosses.Select(kvp => new TextRow(textId, kvp.Key) { Segment = kvp.Value }) + glosses.Select(kvp => new TextRow(textId, kvp.Key) { Segment = kvp.Value.ToList() }) ); AddText(text); } diff --git a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsCorpus.cs b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsCorpus.cs index 767228ec..92cd9b2c 100644 --- a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsCorpus.cs +++ b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsCorpus.cs @@ -10,11 +10,11 @@ public MemoryParatextProjectTermsCorpus( ParatextProjectSettings settings, IEnumerable termCategories, Dictionary files, - bool preferTermsLocalization = false + bool useTermGlosses = true ) { Files = files; - AddTexts(settings, termCategories, preferTermsLocalization); + AddTexts(settings, termCategories, useTermGlosses); } protected override bool Exists(string fileName) diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsCorpusTests.cs b/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsCorpusTests.cs index d5c36112..2ff223be 100644 --- a/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsCorpusTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsCorpusTests.cs @@ -50,13 +50,28 @@ public void TestGetKeyTermsFromTermsLocalizations_NoTermRenderings() new DefaultParatextProjectSettings( biblicalTermsListType: "Major", biblicalTermsFileName: "BiblicalTerms.xml" - ) + ), + useTermGlosses: true ); IList rows = env.Corpus.GetRows().ToList(); Assert.That(rows.Count, Is.EqualTo(5726)); Assert.That(string.Join(" ", rows.First().Segment), Is.EqualTo("Abagtha")); } + [Test] + public void TestGetKeyTermsFromTermsLocalizations_NoTermRenderings_DoNotUseTermGlosses() + { + var env = new TestEnvironment( + new DefaultParatextProjectSettings( + biblicalTermsListType: "Major", + biblicalTermsFileName: "BiblicalTerms.xml" + ), + useTermGlosses: false + ); + IList rows = env.Corpus.GetRows().ToList(); + Assert.That(rows.Count, Is.EqualTo(0)); + } + [Test] public void TestGetKeyTermsFromTermsLocalizations_NoTermRenderings_PreferLocalization() { @@ -65,7 +80,7 @@ public void TestGetKeyTermsFromTermsLocalizations_NoTermRenderings_PreferLocaliz biblicalTermsListType: "Major", biblicalTermsFileName: "BiblicalTerms.xml" ), - preferTermsLocalization: true + useTermGlosses: true ); IList rows = env.Corpus.GetRows().ToList(); Assert.That(rows.Count, Is.EqualTo(5726)); @@ -81,10 +96,10 @@ public void TestGetKeyTermsFromTermsLocalizations_() biblicalTermsFileName: "BiblicalTerms.xml", languageCode: "fr" ), - preferTermsLocalization: true + useTermGlosses: true ); IList rows = env.Corpus.GetRows().ToList(); - Assert.That(rows.Count, Is.EqualTo(5716)); + Assert.That(rows.Count, Is.EqualTo(5715)); Assert.That(string.Join(" ", rows.First().Segment), Is.EqualTo("Aaron")); } @@ -112,11 +127,12 @@ public void TestGetKeyTermsFromTermsLocalizations_TermRenderingsExists_PreferLoc " } }, - preferTermsLocalization: true + useTermGlosses: true ); IList rows = env.Corpus.GetRows().ToList(); Assert.That(rows.Count, Is.EqualTo(5726)); - Assert.That(string.Join(" ", rows.First().Segment), Is.EqualTo("Abagtha")); + Assert.That(string.Join(" ", rows.First().Segment), Is.EqualTo("Xerxes")); + Assert.That(string.Join(" ", rows[2].Segment), Is.EqualTo("Abi")); } [Test] @@ -149,7 +165,7 @@ public void TestGetGlosses(string glossString, IReadOnlyList expectedOut private class TestEnvironment( ParatextProjectSettings? settings = null, Dictionary? files = null, - bool preferTermsLocalization = false + bool useTermGlosses = true ) { public MemoryParatextProjectTermsCorpus Corpus { get; } = @@ -157,7 +173,7 @@ private class TestEnvironment( settings ?? new DefaultParatextProjectSettings(), new string[] { "PN" }, files ?? new(), - preferTermsLocalization + useTermGlosses ); }