diff --git a/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs b/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs index 88773c85..f2a6e275 100644 --- a/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs @@ -1,4 +1,5 @@ using System.IO.Compression; +using System.Text; using System.Text.Json; using NUnit.Framework; @@ -169,4 +170,79 @@ async Task GetUsfmAsync(string projectPath) await GetUsfmAsync(ParatextProjectPath); } } + + [Test] + public async Task Test() + { + FileParatextProjectSettingsParser targetSettingsParser = new(CorporaTestHelpers.UsfmTargetProjectPath); + ParatextProjectSettings targetSettings = targetSettingsParser.Parse(); + + FileParatextProjectSettingsParser sourceSettingsParser = new(CorporaTestHelpers.UsfmSourceProjectPath); + ParatextProjectSettings sourceSettings = sourceSettingsParser.Parse(); + + var sourceCorpus = new ParatextTextCorpus(CorporaTestHelpers.UsfmSourceProjectPath); + var targetCorpus = new ParatextTextCorpus(CorporaTestHelpers.UsfmTargetProjectPath); + + var rows = AlignPretranslateCorpus(sourceCorpus.FilterTexts(["SUS"]), targetCorpus.FilterTexts(["SUS"])) + .ToList(); + + var updater = new UsfmTextUpdater(rows, stripAllText: true, preferExistingText: true); + string usfm = await File.ReadAllTextAsync( + Path.Combine(CorporaTestHelpers.UsfmSourceProjectPath, sourceSettings.GetBookFileName("SUS")) + ); + UsfmParser.Parse(usfm, updater, sourceSettings.Stylesheet, sourceSettings.Versification); + string newUsfm = updater.GetUsfm(sourceSettings.Stylesheet); + } + + private static IEnumerable<(IReadOnlyList, string)> AlignPretranslateCorpus( + ITextCorpus srcCorpus, + ITextCorpus trgCorpus + ) + { + int rowCount = 0; + StringBuilder srcSegBuffer = new(); + StringBuilder trgSegBuffer = new(); + List refs = []; + foreach (ParallelTextRow row in srcCorpus.AlignRows(trgCorpus, allSourceRows: true)) + { + if (!row.IsTargetRangeStart && row.IsTargetInRange) + { + refs.AddRange(row.Refs.Cast()); + if (row.SourceText.Length > 0) + { + if (srcSegBuffer.Length > 0) + srcSegBuffer.Append(' '); + srcSegBuffer.Append(row.SourceText); + } + rowCount++; + } + else + { + if (rowCount > 0) + { + yield return ( + refs.Select(r => ScriptureRef.Parse(r.ToString(), trgCorpus.Versification)).ToArray(), + srcSegBuffer.ToString() + ); + srcSegBuffer.Clear(); + trgSegBuffer.Clear(); + refs.Clear(); + rowCount = 0; + } + + refs.AddRange(row.Refs.Cast()); + srcSegBuffer.Append(row.SourceText); + trgSegBuffer.Append(row.TargetText); + rowCount++; + } + } + + if (rowCount > 0) + { + yield return ( + refs.Select(r => ScriptureRef.Parse(r.ToString(), trgCorpus.Versification)).ToArray(), + srcSegBuffer.ToString() + ); + } + } }