Skip to content

Commit

Permalink
Test versification mismatch
Browse files Browse the repository at this point in the history
  • Loading branch information
ddaspit committed Aug 30, 2024
1 parent 7a819ed commit d8be703
Showing 1 changed file with 73 additions and 0 deletions.
73 changes: 73 additions & 0 deletions tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.IO.Compression;
using System.Text;
using System.Text.Json;
using NUnit.Framework;

Expand Down Expand Up @@ -169,4 +170,76 @@ async Task GetUsfmAsync(string projectPath)
await GetUsfmAsync(ParatextProjectPath);
}
}

[Test]
public void Test()
{
var sourceCorpus = new ParatextTextCorpus(CorporaTestHelpers.UsfmSourceProjectPath);
var targetCorpus = new ParatextTextCorpus(CorporaTestHelpers.UsfmTargetProjectPath);

var rows = AlignPretranslateCorpus(sourceCorpus.FilterTexts(["SUS"]), targetCorpus.FilterTexts(["SUS"]))
.Select(p =>
(
Refs: (IReadOnlyList<ScriptureRef>)
p.Refs.Select(r => ScriptureRef.Parse(r, targetCorpus.Versification)).ToArray(),
p.Translation
)
)
.OrderBy(p => p.Refs[0]);

var updater = new FileParatextProjectTextUpdater(CorporaTestHelpers.UsfmSourceProjectPath);
string newUsfm = updater.UpdateUsfm("SUS", rows.ToArray(), stripAllText: true, preferExistingText: true);
Assert.That(
newUsfm,
Contains.Substring(
"\\v 65 et rex Astyages adpositus est ad patres suos et suscepit Cyrus Perses regnum eius"
)
);
}

private static IEnumerable<(IReadOnlyList<string> Refs, string Translation)> AlignPretranslateCorpus(
ITextCorpus srcCorpus,
ITextCorpus trgCorpus
)
{
int rowCount = 0;
StringBuilder srcSegBuffer = new();
StringBuilder trgSegBuffer = new();
List<ScriptureRef> refs = [];
foreach (ParallelTextRow row in srcCorpus.AlignRows(trgCorpus, allSourceRows: true))
{
if (!row.IsTargetRangeStart && row.IsTargetInRange)
{
refs.AddRange(row.TargetRefs.Cast<ScriptureRef>());
if (row.SourceText.Length > 0)
{
if (srcSegBuffer.Length > 0)
srcSegBuffer.Append(' ');
srcSegBuffer.Append(row.SourceText);
}
rowCount++;
}
else
{
if (rowCount > 0)
{
yield return (refs.Select(r => r.ToString()).ToArray(), srcSegBuffer.ToString());
srcSegBuffer.Clear();
trgSegBuffer.Clear();
refs.Clear();
rowCount = 0;
}

refs.AddRange(row.TargetRefs.Cast<ScriptureRef>());
srcSegBuffer.Append(row.SourceText);
trgSegBuffer.Append(row.TargetText);
rowCount++;
}
}

if (rowCount > 0)
{
yield return (refs.Select(r => r.ToString()).ToArray(), srcSegBuffer.ToString());
}
}
}

0 comments on commit d8be703

Please sign in to comment.