Skip to content

Commit

Permalink
Move error-handling to updater base; use updater in manual test
Browse files Browse the repository at this point in the history
  • Loading branch information
Enkidu93 committed Aug 2, 2024
1 parent 54f0980 commit 9f431f9
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 47 deletions.
20 changes: 17 additions & 3 deletions src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
using System.Collections.Generic;
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;

namespace SIL.Machine.Corpora
{
Expand Down Expand Up @@ -41,8 +43,20 @@ public string UpdateUsfm(
stripAllText,
preferExistingText: preferExistingText
);
UsfmParser.Parse(usfm, handler, _settings.Stylesheet, _settings.Versification);
return handler.GetUsfm(_settings.Stylesheet);
try
{
UsfmParser.Parse(usfm, handler, _settings.Stylesheet, _settings.Versification);
return handler.GetUsfm(_settings.Stylesheet);
}
catch (Exception ex)
{
var sb = new StringBuilder();
sb.Append($"An error occurred while parsing the usfm for '{bookId}`");
if (!string.IsNullOrEmpty(_settings.Name))
sb.Append($" in project '{_settings.Name}'");
sb.Append($". Error: '{ex.Message}'");
throw new InvalidOperationException(sb.ToString(), ex);
}
}

protected abstract bool Exists(string fileName);
Expand Down
19 changes: 3 additions & 16 deletions src/SIL.Machine/Corpora/UsfmParser.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
using System;
using System.Collections.Generic;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using SIL.Scripture;

Expand Down Expand Up @@ -43,19 +41,8 @@ public static void Parse(
versification,
preserveWhitespace
);
try
{
parser.ProcessTokens();
}
catch (Exception ex)
{
var sb = new StringBuilder();
sb.Append(
$"An error occurred while parsing the USFM text in Verse: {parser.State.VerseRef}, line: {parser.State.LineNumber}, "
);
sb.Append($"column: {parser.State.ColumnNumber}, error: '{ex.Message}'");
throw new InvalidOperationException(sb.ToString(), ex);
}

parser.ProcessTokens();
}

private static readonly Regex OptBreakSplitter = new Regex("(//)", RegexOptions.Compiled);
Expand Down
67 changes: 39 additions & 28 deletions tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ public class UsfmManualTests
{
[Test]
[Ignore("This is for manual testing only. Remove this tag to run the test.")]
public async Task ParseParallelCorpusAsync()
public void ParseParallelCorpusAsync()
{
ParatextTextCorpus tCorpus =
new(projectDir: CorporaTestHelpers.UsfmTargetProjectPath, includeAllText: true, includeMarkers: true);
Expand All @@ -36,18 +36,20 @@ public async Task ParseParallelCorpusAsync()
ParatextProjectSettings targetSettings = new FileParatextProjectSettingsParser(
CorporaTestHelpers.UsfmTargetProjectPath
).Parse();

var updater = new FileParatextProjectTextUpdater(CorporaTestHelpers.UsfmTargetProjectPath);
foreach (
string sfmFileName in Directory.EnumerateFiles(
CorporaTestHelpers.UsfmTargetProjectPath,
$"{targetSettings.FileNamePrefix}*{targetSettings.FileNameSuffix}"
)
string sfmFileName in Directory
.EnumerateFiles(
CorporaTestHelpers.UsfmTargetProjectPath,
$"{targetSettings.FileNamePrefix}*{targetSettings.FileNameSuffix}"
)
.Select(path => new DirectoryInfo(path).Name)
)
{
var updater = new UpdateUsfmParserHandler(pretranslations, stripAllText: true, preferExistingText: false);
string usfm = await File.ReadAllTextAsync(sfmFileName);
UsfmParser.Parse(usfm, updater, targetSettings.Stylesheet, targetSettings.Versification);
string newUsfm = updater.GetUsfm(targetSettings.Stylesheet);
string bookId;
if (!targetSettings.IsBookFileName(sfmFileName, out bookId))
continue;
string newUsfm = updater.UpdateUsfm(bookId, pretranslations, stripAllText: true, preferExistingText: false);
Assert.That(newUsfm, Is.Not.Null);
}
}
Expand Down Expand Up @@ -105,43 +107,52 @@ async Task GetUsfmAsync(string projectPath)
)
)
.ToArrayAsync();
List<string> sfmTexts = [];
List<string> bookIds = [];
ParatextProjectTextUpdaterBase updater;
if (projectArchive == null)
{
sfmTexts = (
await Task.WhenAll(
Directory
.EnumerateFiles(projectPath, $"{settings.FileNamePrefix}*{settings.FileNameSuffix}")
.Select(async sfmFileName => await File.ReadAllTextAsync(sfmFileName))
)
bookIds = (
Directory
.EnumerateFiles(projectPath, $"{settings.FileNamePrefix}*{settings.FileNameSuffix}")
.Select(path => new DirectoryInfo(path).Name)
.Select(filename =>
{
string bookId;
if (settings.IsBookFileName(filename, out bookId))
return bookId;
else
return "";
})
.Where(id => id != "")
).ToList();
updater = new FileParatextProjectTextUpdater(projectPath);
}
else
{
sfmTexts = projectArchive
bookIds = projectArchive
.Entries.Where(e =>
e.Name.StartsWith(settings.FileNamePrefix) && e.Name.EndsWith(settings.FileNameSuffix)
)
.Select(e =>
{
string contents;
using (var sr = new StreamReader(e.Open()))
{
contents = sr.ReadToEnd();
}
return contents;
string bookId;
if (settings.IsBookFileName(e.Name, out bookId))
return bookId;
else
return "";
})
.Where(id => id != "")
.ToList();
updater = new ZipParatextProjectTextUpdater(projectArchive);
}
foreach (string usfm in sfmTexts)
foreach (string bookId in bookIds)
{
var updater = new UpdateUsfmParserHandler(
string newUsfm = updater.UpdateUsfm(
bookId,
pretranslations,
stripAllText: true,
preferExistingText: true
);
UsfmParser.Parse(usfm, updater, settings.Stylesheet, settings.Versification);
string newUsfm = updater.GetUsfm(settings.Stylesheet);
Assert.That(newUsfm, Is.Not.Null);
}
}
Expand Down

0 comments on commit 9f431f9

Please sign in to comment.