From 9f431f95cbd4740ab58c629fbad21fe29a75b690 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Fri, 2 Aug 2024 08:44:07 -0400 Subject: [PATCH] Move error-handling to updater base; use updater in manual test --- .../Corpora/ParatextProjectTextUpdaterBase.cs | 20 +++++- src/SIL.Machine/Corpora/UsfmParser.cs | 19 +----- .../Corpora/UsfmManualTests.cs | 67 +++++++++++-------- 3 files changed, 59 insertions(+), 47 deletions(-) diff --git a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs index 74224040..07c2ca6c 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs @@ -1,5 +1,7 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.IO; +using System.Text; namespace SIL.Machine.Corpora { @@ -41,8 +43,20 @@ public string UpdateUsfm( stripAllText, preferExistingText: preferExistingText ); - UsfmParser.Parse(usfm, handler, _settings.Stylesheet, _settings.Versification); - return handler.GetUsfm(_settings.Stylesheet); + try + { + UsfmParser.Parse(usfm, handler, _settings.Stylesheet, _settings.Versification); + return handler.GetUsfm(_settings.Stylesheet); + } + catch (Exception ex) + { + var sb = new StringBuilder(); + sb.Append($"An error occurred while parsing the usfm for '{bookId}`"); + if (!string.IsNullOrEmpty(_settings.Name)) + sb.Append($" in project '{_settings.Name}'"); + sb.Append($". Error: '{ex.Message}'"); + throw new InvalidOperationException(sb.ToString(), ex); + } } protected abstract bool Exists(string fileName); diff --git a/src/SIL.Machine/Corpora/UsfmParser.cs b/src/SIL.Machine/Corpora/UsfmParser.cs index 108f12be..8028b2fa 100644 --- a/src/SIL.Machine/Corpora/UsfmParser.cs +++ b/src/SIL.Machine/Corpora/UsfmParser.cs @@ -1,7 +1,5 @@ -using System; -using System.Collections.Generic; +using System.Collections.Generic; using System.Linq; -using System.Text; using System.Text.RegularExpressions; using SIL.Scripture; @@ -43,19 +41,8 @@ public static void Parse( versification, preserveWhitespace ); - try - { - parser.ProcessTokens(); - } - catch (Exception ex) - { - var sb = new StringBuilder(); - sb.Append( - $"An error occurred while parsing the USFM text in Verse: {parser.State.VerseRef}, line: {parser.State.LineNumber}, " - ); - sb.Append($"column: {parser.State.ColumnNumber}, error: '{ex.Message}'"); - throw new InvalidOperationException(sb.ToString(), ex); - } + + parser.ProcessTokens(); } private static readonly Regex OptBreakSplitter = new Regex("(//)", RegexOptions.Compiled); diff --git a/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs b/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs index 833b71a1..88773c85 100644 --- a/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs @@ -9,7 +9,7 @@ public class UsfmManualTests { [Test] [Ignore("This is for manual testing only. Remove this tag to run the test.")] - public async Task ParseParallelCorpusAsync() + public void ParseParallelCorpusAsync() { ParatextTextCorpus tCorpus = new(projectDir: CorporaTestHelpers.UsfmTargetProjectPath, includeAllText: true, includeMarkers: true); @@ -36,18 +36,20 @@ public async Task ParseParallelCorpusAsync() ParatextProjectSettings targetSettings = new FileParatextProjectSettingsParser( CorporaTestHelpers.UsfmTargetProjectPath ).Parse(); - + var updater = new FileParatextProjectTextUpdater(CorporaTestHelpers.UsfmTargetProjectPath); foreach ( - string sfmFileName in Directory.EnumerateFiles( - CorporaTestHelpers.UsfmTargetProjectPath, - $"{targetSettings.FileNamePrefix}*{targetSettings.FileNameSuffix}" - ) + string sfmFileName in Directory + .EnumerateFiles( + CorporaTestHelpers.UsfmTargetProjectPath, + $"{targetSettings.FileNamePrefix}*{targetSettings.FileNameSuffix}" + ) + .Select(path => new DirectoryInfo(path).Name) ) { - var updater = new UpdateUsfmParserHandler(pretranslations, stripAllText: true, preferExistingText: false); - string usfm = await File.ReadAllTextAsync(sfmFileName); - UsfmParser.Parse(usfm, updater, targetSettings.Stylesheet, targetSettings.Versification); - string newUsfm = updater.GetUsfm(targetSettings.Stylesheet); + string bookId; + if (!targetSettings.IsBookFileName(sfmFileName, out bookId)) + continue; + string newUsfm = updater.UpdateUsfm(bookId, pretranslations, stripAllText: true, preferExistingText: false); Assert.That(newUsfm, Is.Not.Null); } } @@ -105,43 +107,52 @@ async Task GetUsfmAsync(string projectPath) ) ) .ToArrayAsync(); - List sfmTexts = []; + List bookIds = []; + ParatextProjectTextUpdaterBase updater; if (projectArchive == null) { - sfmTexts = ( - await Task.WhenAll( - Directory - .EnumerateFiles(projectPath, $"{settings.FileNamePrefix}*{settings.FileNameSuffix}") - .Select(async sfmFileName => await File.ReadAllTextAsync(sfmFileName)) - ) + bookIds = ( + Directory + .EnumerateFiles(projectPath, $"{settings.FileNamePrefix}*{settings.FileNameSuffix}") + .Select(path => new DirectoryInfo(path).Name) + .Select(filename => + { + string bookId; + if (settings.IsBookFileName(filename, out bookId)) + return bookId; + else + return ""; + }) + .Where(id => id != "") ).ToList(); + updater = new FileParatextProjectTextUpdater(projectPath); } else { - sfmTexts = projectArchive + bookIds = projectArchive .Entries.Where(e => e.Name.StartsWith(settings.FileNamePrefix) && e.Name.EndsWith(settings.FileNameSuffix) ) .Select(e => { - string contents; - using (var sr = new StreamReader(e.Open())) - { - contents = sr.ReadToEnd(); - } - return contents; + string bookId; + if (settings.IsBookFileName(e.Name, out bookId)) + return bookId; + else + return ""; }) + .Where(id => id != "") .ToList(); + updater = new ZipParatextProjectTextUpdater(projectArchive); } - foreach (string usfm in sfmTexts) + foreach (string bookId in bookIds) { - var updater = new UpdateUsfmParserHandler( + string newUsfm = updater.UpdateUsfm( + bookId, pretranslations, stripAllText: true, preferExistingText: true ); - UsfmParser.Parse(usfm, updater, settings.Stylesheet, settings.Versification); - string newUsfm = updater.GetUsfm(settings.Stylesheet); Assert.That(newUsfm, Is.Not.Null); } }