Skip to content

Commit

Permalink
Recreate the USFM text updating issue in unit tests. Make USFM updati…
Browse files Browse the repository at this point in the history
…ng errors more explicit: sillsdev/serval#398
  • Loading branch information
johnml1135 committed Jun 3, 2024
1 parent a19d577 commit 5aca0ac
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 10 deletions.
17 changes: 15 additions & 2 deletions src/SIL.Machine/Corpora/UsfmParser.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
using System.Collections.Generic;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using SIL.Scripture;

Expand Down Expand Up @@ -41,7 +43,18 @@ public static void Parse(
versification,
preserveWhitespace
);
parser.ProcessTokens();
try
{
parser.ProcessTokens();
}
catch (Exception ex)
{
var sb = new StringBuilder();
sb.Append(
$"An error occurred while parsing the USFM text Verse: {parser.State.VerseRef}, offset: {parser.State.VerseOffset}, error: '{ex.Message}'"
);
throw new InvalidOperationException(sb.ToString(), ex);
}
}

private static readonly Regex OptBreakSplitter = new Regex("(//)", RegexOptions.Compiled);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
\p
\v 6 Chapter two, verse \w six|strong="12345" \w*.
\p
\v 6 Bad verse.
\v 6 Bad verse. \x - \xo abc\xt 123\x* and more content.
\p
\v 5 Chapter two, verse five \rq (MAT 3:1)\rq*.
\v 7a Chapter two, verse seven A,
Expand Down
10 changes: 5 additions & 5 deletions tests/SIL.Machine.Tests/Corpora/UsfmFileTextTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ public void GetRows_NonEmptyText_AllText()

IText text = corpus["MAT"];
TextRow[] rows = text.GetRows().ToArray();
Assert.That(rows, Has.Length.EqualTo(49));
Assert.That(rows, Has.Length.EqualTo(50));

Assert.That(rows[0].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/1:h", corpus.Versification)));
Assert.That(rows[0].Text, Is.EqualTo("Matthew"));
Expand Down Expand Up @@ -132,11 +132,11 @@ public void GetRows_NonEmptyText_AllText()
Assert.That(rows[30].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:3/1:esb/2:p", corpus.Versification)));
Assert.That(rows[30].Text, Is.EqualTo("Here is some sidebar content."));

Assert.That(rows[36].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:7a/1:s", corpus.Versification)));
Assert.That(rows[36].Text, Is.EqualTo("Section header"));
Assert.That(rows[37].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:7a/1:s", corpus.Versification)));
Assert.That(rows[37].Text, Is.EqualTo("Section header"));

Assert.That(rows[43].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:12/1:restore", corpus.Versification)));
Assert.That(rows[43].Text, Is.EqualTo("restore information"));
Assert.That(rows[44].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:12/1:restore", corpus.Versification)));
Assert.That(rows[44].Text, Is.EqualTo("restore information"));
}

[Test]
Expand Down
26 changes: 25 additions & 1 deletion tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ public class UsfmManualTests
{
[Test]
[Ignore("This is for manual testing only. Remove this tag to run the test.")]
public void ParseParallelCorpus()
public async Task ParseParallelCorpusAsync()
{
ParatextTextCorpus tCorpus =
new(projectDir: CorporaTestHelpers.UsfmTargetProjectPath, includeAllText: true, includeMarkers: true);
Expand All @@ -25,6 +25,30 @@ public void ParseParallelCorpus()

List<ParallelTextRow> rows = pCorpus.GetRows().ToList();
Assert.That(rows, Has.Count.GreaterThan(0));

// insert the source into the target as pretranslations to make sure that USFM generation works
IReadOnlyList<(IReadOnlyList<ScriptureRef>, string)> pretranslations = rows.Select(r =>
((IReadOnlyList<ScriptureRef>)r.SourceRefs.Select(s => (ScriptureRef)s).ToList(), r.SourceText)
)
.ToList();

ParatextProjectSettings targetSettings = new FileParatextProjectSettingsParser(
CorporaTestHelpers.UsfmTargetProjectPath
).Parse();

foreach (
string sfmFileName in Directory.EnumerateFiles(
CorporaTestHelpers.UsfmTargetProjectPath,
$"{targetSettings.FileNamePrefix}*{targetSettings.FileNameSuffix}"
)
)
{
var updater = new UsfmTextUpdater(pretranslations, stripAllText: true, preferExistingText: false);
string usfm = await File.ReadAllTextAsync(sfmFileName);
UsfmParser.Parse(usfm, updater, targetSettings.Stylesheet, targetSettings.Versification);
string newUsfm = updater.GetUsfm(targetSettings.Stylesheet);
Assert.That(newUsfm, Is.Not.Null);
}
}

public record PretranslationDto
Expand Down
2 changes: 1 addition & 1 deletion tests/SIL.Machine.Tests/Corpora/UsfmTokenizerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ public void Tokenize()
string usfm = ReadUsfm();
var tokenizer = new UsfmTokenizer();
IReadOnlyList<UsfmToken> tokens = tokenizer.Tokenize(usfm);
Assert.That(tokens, Has.Count.EqualTo(218));
Assert.That(tokens, Has.Count.EqualTo(225));

Assert.That(tokens[0].Type, Is.EqualTo(UsfmTokenType.Book));
Assert.That(tokens[0].Marker, Is.EqualTo("id"));
Expand Down

0 comments on commit 5aca0ac

Please sign in to comment.