Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend manual test to process multiple projects including zipped projects #227

Merged
merged 5 commits into from
Jul 16, 2024
Merged
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 82 additions & 30 deletions tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.Text.Json;
using System.IO.Compression;
using System.Text.Json;
using NUnit.Framework;

namespace SIL.Machine.Corpora;
Expand Down Expand Up @@ -66,40 +67,91 @@ public record PretranslationDto

[Test]
[Ignore("This is for manual testing only. Remove this tag to run the test.")]
/*
In order to run this test on specific projects, place the Paratext projects or Paratext project zips in the Corpora/TestData/project/ folder.
If only testing one project, you can instead place the project in the Corpora/TestData/ folder and rename it to "project"
*/
public async Task CreateUsfmFile()
{
FileParatextProjectSettingsParser parser = new(ParatextProjectPath);
ParatextProjectSettings settings = parser.Parse();
async Task GetUsfmAsync(string projectPath)
{
ParatextProjectSettingsParserBase parser;
ZipArchive? projectArchive = null;
try
{
projectArchive = ZipFile.Open(projectPath, ZipArchiveMode.Read);
parser = new ZipParatextProjectSettingsParser(projectArchive);
}
catch (UnauthorizedAccessException)
{
parser = new FileParatextProjectSettingsParser(projectPath);
}
ParatextProjectSettings settings = parser.Parse();

// Read text from pretranslations file
using Stream pretranslationStream = File.OpenRead(PretranslationPath);
(IReadOnlyList<ScriptureRef>, string)[] pretranslations = await JsonSerializer
.DeserializeAsyncEnumerable<PretranslationDto>(
pretranslationStream,
new JsonSerializerOptions { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }
)
.Select(p =>
(
(IReadOnlyList<ScriptureRef>)(
p?.Refs.Select(r => ScriptureRef.Parse(r, settings.Versification).ToRelaxed()).ToArray() ?? []
),
p?.Translation ?? ""
// Read text from pretranslations file
using Stream pretranslationStream = File.OpenRead(PretranslationPath);
(IReadOnlyList<ScriptureRef>, string)[] pretranslations = await JsonSerializer
.DeserializeAsyncEnumerable<PretranslationDto>(
pretranslationStream,
new JsonSerializerOptions { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }
)
)
.ToArrayAsync();

foreach (
string sfmFileName in Directory.EnumerateFiles(
ParatextProjectPath,
$"{settings.FileNamePrefix}*{settings.FileNameSuffix}"
)
)
.Select(p =>
(
(IReadOnlyList<ScriptureRef>)(
p?.Refs.Select(r => ScriptureRef.Parse(r, settings.Versification).ToRelaxed()).ToArray()
?? []
),
p?.Translation ?? ""
)
)
.ToArrayAsync();
List<string> sfmTexts = [];
if (projectArchive == null)
{
sfmTexts = (
await Task.WhenAll(
Directory
.EnumerateFiles(projectPath, $"{settings.FileNamePrefix}*{settings.FileNameSuffix}")
.Select(async sfmFileName => await File.ReadAllTextAsync(sfmFileName))
)
).ToList();
}
else
{
sfmTexts = projectArchive
.Entries.Where(e =>
e.Name.StartsWith(settings.FileNamePrefix) && e.Name.EndsWith(settings.FileNameSuffix)
)
.Select(e =>
{
string contents;
using (var sr = new StreamReader(e.Open()))
{
contents = sr.ReadToEnd();
}
return contents;
})
.ToList();
}
foreach (string usfm in sfmTexts)
{
var updater = new UsfmTextUpdater(pretranslations, stripAllText: true, preferExistingText: true);
UsfmParser.Parse(usfm, updater, settings.Stylesheet, settings.Versification);
string newUsfm = updater.GetUsfm(settings.Stylesheet);
Assert.That(newUsfm, Is.Not.Null);
}
}
if (!File.Exists(Path.Combine(ParatextProjectPath, "Settings.xml")))
{
var updater = new UsfmTextUpdater(pretranslations, stripAllText: true, preferExistingText: true);
string usfm = await File.ReadAllTextAsync(sfmFileName);
UsfmParser.Parse(usfm, updater, settings.Stylesheet, settings.Versification);
string newUsfm = updater.GetUsfm(settings.Stylesheet);
Assert.That(newUsfm, Is.Not.Null);
Assert.Multiple(() =>
{
foreach (string subdir in Directory.EnumerateFiles(ParatextProjectPath))
Assert.DoesNotThrowAsync(async () => await GetUsfmAsync(subdir), $"Failed to parse {subdir}");
});
}
else
{
await GetUsfmAsync(ParatextProjectPath);
}
}
}
Loading