diff --git a/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs b/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs index 44d280f8..df6c6c7b 100644 --- a/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs +++ b/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs @@ -1,34 +1,34 @@ using System.Collections.Generic; -using System.IO; using System.IO.Compression; +using System.Linq; namespace SIL.Machine.Corpora { - public class ParatextBackupTermsCorpus : ParatextTermsCorpusBase + public class ParatextBackupTermsCorpus : DictionaryTextCorpus { - private readonly ZipArchive _archive; - public ParatextBackupTermsCorpus( - ZipArchive archive, + string fileName, IEnumerable termCategories, bool useTermGlosses = true ) { - _archive = archive; - AddTexts(new ZipParatextProjectSettingsParser(archive).Parse(), termCategories, useTermGlosses); - } - - protected override bool Exists(string fileName) - { - return _archive.GetEntry(fileName) != null; - } + using (var archive = ZipFile.OpenRead(fileName)) + { + ParatextProjectSettings settings = new ZipParatextProjectSettingsParser(archive).Parse(); + IEnumerable<(string, IEnumerable)> glosses = new ZipParatextTermsParser(archive).Parse( + settings, + termCategories, + useTermGlosses + ); + string textId = + $"{settings.BiblicalTermsListType}:{settings.BiblicalTermsProjectName}:{settings.BiblicalTermsFileName}"; - protected override Stream Open(string fileName) - { - ZipArchiveEntry entry = _archive.GetEntry(fileName); - if (entry == null) - return null; - return entry.Open(); + IText text = new MemoryText( + textId, + glosses.Select(kvp => new TextRow(textId, kvp.Item1) { Segment = kvp.Item2.ToList() }) + ); + AddText(text); + } } } } diff --git a/src/SIL.Machine/Corpora/ParatextTermsCorpusBase.cs b/src/SIL.Machine/Corpora/ParatextTermsParserBase.cs similarity index 90% rename from src/SIL.Machine/Corpora/ParatextTermsCorpusBase.cs rename to src/SIL.Machine/Corpora/ParatextTermsParserBase.cs index 4c86d34d..c58d2db8 100644 --- a/src/SIL.Machine/Corpora/ParatextTermsCorpusBase.cs +++ b/src/SIL.Machine/Corpora/ParatextTermsParserBase.cs @@ -9,7 +9,7 @@ namespace SIL.Machine.Corpora { - public abstract class ParatextTermsCorpusBase : DictionaryTextCorpus + public abstract class ParatextTermsParserBase { private static readonly List PredefinedTermsListTypes = new List() { @@ -34,7 +34,7 @@ public abstract class ParatextTermsCorpusBase : DictionaryTextCorpus private static readonly Regex ContentInBracketsRegex = new Regex(@"^\[(.+?)\]$", RegexOptions.Compiled); private static readonly Regex NumericalInformationRegex = new Regex(@"\s+\d+(\.\d+)*$", RegexOptions.Compiled); - protected void AddTexts( + public IEnumerable<(string, IEnumerable)> Parse( ParatextProjectSettings settings, IEnumerable termCategories, bool useTermGlosses = true @@ -144,7 +144,12 @@ protected void AddTexts( .ToDictionary(kvp => kvp.Item1, kvp => kvp.Item2); } if (termsGlosses.Count > 0 || termsRenderings.Count > 0) - AddTerms(termsRenderings, termsGlosses, settings); + { + return termsRenderings + .Concat(termsGlosses.Where(kvp => !termsRenderings.ContainsKey(kvp.Key))) + .Select(kvp => (kvp.Key, kvp.Value)); + } + return new List<(string, IEnumerable)>(); } private static bool IsInCategory( @@ -158,26 +163,6 @@ IDictionary termIdToCategoryDictionary || (termIdToCategoryDictionary.TryGetValue(id, out category) && termCategories.Contains(category)); } - private void AddTerms( - IDictionary> termsRenderings, - IDictionary> termsGlosses, - ParatextProjectSettings settings - ) - { - string textId = - $"{settings.BiblicalTermsListType}:{settings.BiblicalTermsProjectName}:{settings.BiblicalTermsFileName}"; - - //Prefer renderings to gloss localizations - IDictionary> glosses = termsRenderings - .Concat(termsGlosses.Where(kvp => !termsRenderings.ContainsKey(kvp.Key))) - .ToDictionary(kvp => kvp.Key, kvp => kvp.Value); - IText text = new MemoryText( - textId, - glosses.Select(kvp => new TextRow(textId, kvp.Key) { Segment = kvp.Value.ToList() }) - ); - AddText(text); - } - public static IReadOnlyList GetGlosses(string gloss) { //If entire term rendering is surrounded in square brackets, remove them diff --git a/src/SIL.Machine/Corpora/ZipParatextTermsParser.cs b/src/SIL.Machine/Corpora/ZipParatextTermsParser.cs new file mode 100644 index 00000000..d41f2477 --- /dev/null +++ b/src/SIL.Machine/Corpora/ZipParatextTermsParser.cs @@ -0,0 +1,28 @@ +using System.IO; +using System.IO.Compression; + +namespace SIL.Machine.Corpora +{ + public class ZipParatextTermsParser : ParatextTermsParserBase + { + private readonly ZipArchive _archive; + + public ZipParatextTermsParser(ZipArchive archive) + { + _archive = archive; + } + + protected override bool Exists(string fileName) + { + return _archive.GetEntry(fileName) != null; + } + + protected override Stream Open(string fileName) + { + ZipArchiveEntry entry = _archive.GetEntry(fileName); + if (entry == null) + return null; + return entry.Open(); + } + } +} diff --git a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsCorpus.cs b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsCorpus.cs deleted file mode 100644 index 92cd9b2c..00000000 --- a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsCorpus.cs +++ /dev/null @@ -1,31 +0,0 @@ -using System.Text; - -namespace SIL.Machine.Corpora; - -public class MemoryParatextProjectTermsCorpus : ParatextTermsCorpusBase -{ - public Dictionary Files { get; } - - public MemoryParatextProjectTermsCorpus( - ParatextProjectSettings settings, - IEnumerable termCategories, - Dictionary files, - bool useTermGlosses = true - ) - { - Files = files; - AddTexts(settings, termCategories, useTermGlosses); - } - - protected override bool Exists(string fileName) - { - return Files.ContainsKey(fileName); - } - - protected override Stream? Open(string fileName) - { - if (!Files.TryGetValue(fileName, out string? contents)) - return null; - return new MemoryStream(Encoding.UTF8.GetBytes(contents)); - } -} diff --git a/tests/SIL.Machine.Tests/Corpora/MemoryProjectTermsParser.cs b/tests/SIL.Machine.Tests/Corpora/MemoryProjectTermsParser.cs new file mode 100644 index 00000000..d4bfc594 --- /dev/null +++ b/tests/SIL.Machine.Tests/Corpora/MemoryProjectTermsParser.cs @@ -0,0 +1,20 @@ +using System.Text; + +namespace SIL.Machine.Corpora; + +public class MemoryParatextTermsParser(IDictionary files) : ParatextTermsParserBase +{ + public IDictionary Files { get; } = files; + + protected override bool Exists(string fileName) + { + return Files.ContainsKey(fileName); + } + + protected override Stream? Open(string fileName) + { + if (!Files.TryGetValue(fileName, out string? contents)) + return null; + return new MemoryStream(Encoding.UTF8.GetBytes(contents)); + } +} diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsCorpus.cs b/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsCorpus.cs new file mode 100644 index 00000000..740d17db --- /dev/null +++ b/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsCorpus.cs @@ -0,0 +1,26 @@ +namespace SIL.Machine.Corpora; + +public class ParatextProjectTermsCorpus : DictionaryTextCorpus +{ + public ParatextProjectTermsCorpus( + IDictionary files, + ParatextProjectSettings settings, + IEnumerable termCategories, + bool useTermGlosses = true + ) + { + IEnumerable<(string, IEnumerable)> glosses = new MemoryParatextTermsParser(files).Parse( + settings, + termCategories, + useTermGlosses + ); + string textId = + $"{settings.BiblicalTermsListType}:{settings.BiblicalTermsProjectName}:{settings.BiblicalTermsFileName}"; + + IText text = new MemoryText( + textId, + glosses.Select(kvp => new TextRow(textId, kvp.Item1) { Segment = kvp.Item2.ToList() }) + ); + AddText(text); + } +} diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsCorpusTests.cs b/tests/SIL.Machine.Tests/Corpora/ParatextTermsCorpusTests.cs similarity index 95% rename from tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsCorpusTests.cs rename to tests/SIL.Machine.Tests/Corpora/ParatextTermsCorpusTests.cs index 2ff223be..59a91f7c 100644 --- a/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsCorpusTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/ParatextTermsCorpusTests.cs @@ -5,7 +5,7 @@ namespace SIL.Machine.Corpora; [TestFixture] -public class ParatextProjectTermsCorpusTests +public class ParatextTermsCorpusTests { [Test] public void TestGetKeyTermsFromTermsRenderings() @@ -144,7 +144,7 @@ public void TestGetKeyTermsFromTermsLocalizations_TermRenderingsExists_PreferLoc public void TestStripParens(string testString, string expectedOutput, char left = '(', char right = ')') { Assert.That( - ParatextTermsCorpusBase.StripParens(testString, left: left, right: right), + ParatextTermsParserBase.StripParens(testString, left: left, right: right), Is.EqualTo(expectedOutput) ); } @@ -159,7 +159,7 @@ public void TestStripParens(string testString, string expectedOutput, char left [TestCase("Ahasuerus, Xerxes; Assuerus", new string[] { "Ahasuerus", "Xerxes", "Assuerus" })] public void TestGetGlosses(string glossString, IReadOnlyList expectedOutput) { - Assert.That(ParatextTermsCorpusBase.GetGlosses(glossString), Is.EqualTo(expectedOutput)); + Assert.That(ParatextTermsParserBase.GetGlosses(glossString), Is.EqualTo(expectedOutput)); } private class TestEnvironment( @@ -168,11 +168,11 @@ private class TestEnvironment( bool useTermGlosses = true ) { - public MemoryParatextProjectTermsCorpus Corpus { get; } = - new MemoryParatextProjectTermsCorpus( + public ParatextProjectTermsCorpus Corpus { get; } = + new ParatextProjectTermsCorpus( + files ?? new(), settings ?? new DefaultParatextProjectSettings(), new string[] { "PN" }, - files ?? new(), useTermGlosses ); }