Skip to content

Commit

Permalink
fixes from integration
Browse files Browse the repository at this point in the history
  • Loading branch information
johnml1135 committed Jun 7, 2024
1 parent 67f63b8 commit 054c9a6
Showing 1 changed file with 17 additions and 21 deletions.
38 changes: 17 additions & 21 deletions src/SIL.Machine.AspNetCore/Services/NmtPreprocessBuildJob.cs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ CancellationToken cancellationToken
continue;
}

Row[] trainRows = rows.Where(r => IsIncluded(r, corpus.TrainOnChapters)).Cast<Row>().ToArray();
Row[] trainRows = rows.Where(row => IsInTrain(row, corpus)).Cast<Row>().ToArray();
if (trainRows.Length > 0)
{
Row row = trainRows[0];
Expand Down Expand Up @@ -187,7 +187,7 @@ CancellationToken cancellationToken
foreach (Row row in AlignPretranslateCorpus(corpus, sourceTextCorpora[0], targetTextCorpus))
{
if (
IsIncluded(row, corpus.PretranslateChapters)
IsInPretranslate(row, corpus)
&& row.SourceSegment.Length > 0
&& (row.TargetSegment.Length == 0 || !IsInTrain(row, corpus))
)
Expand Down Expand Up @@ -231,22 +231,24 @@ JobCompletionStatus completionStatus
}
}

private static bool IsInTrain(Row row, Corpus corpus)
private static bool IsInTrain(Row? row, Corpus corpus)
{
return IsIncluded(row, corpus.TrainOnTextIds, corpus.TrainOnChapters);
}

private static bool IsInPretranslate(Row row, Corpus corpus)
private static bool IsInPretranslate(Row? row, Corpus corpus)
{
return IsIncluded(row, corpus.PretranslateTextIds, corpus.PretranslateChapters);
}

private static bool IsIncluded(
Row row,
Row? row,
IReadOnlySet<string>? textIds,
IReadOnlyDictionary<string, HashSet<int>>? chapters
)
{
if (row is null)
return false;
if (chapters is not null)
{
return row.Refs.Any(r => IsInChapters(chapters, r));
Expand All @@ -272,14 +274,11 @@ private static bool IsInChapters(IReadOnlyDictionary<string, HashSet<int>> bookC
ITextCorpus trgCorpus
)
{
if (!corpus.TrainOnAll)
{
IEnumerable<string> textIds = corpus.TrainOnChapters is not null
? corpus.TrainOnChapters.Keys
: corpus.TrainOnTextIds;
srcCorpora = srcCorpora.Select(sc => sc.FilterTexts(textIds)).ToArray();
trgCorpus = trgCorpus.FilterTexts(textIds);
}
IEnumerable<string>? textIds = corpus.TrainOnChapters is not null
? corpus.TrainOnChapters.Keys
: corpus.TrainOnTextIds;
srcCorpora = srcCorpora.Select(sc => sc.FilterTexts(textIds)).ToArray();
trgCorpus = trgCorpus.FilterTexts(textIds);

if (trgCorpus.IsScripture())
{
Expand Down Expand Up @@ -396,14 +395,11 @@ ITextCorpus trgCorpus

private static IEnumerable<Row> AlignPretranslateCorpus(Corpus corpus, ITextCorpus srcCorpus, ITextCorpus trgCorpus)
{
if (!corpus.PretranslateAll)
{
IEnumerable<string> textIds = corpus.PretranslateChapters is not null
? corpus.PretranslateChapters.Keys
: corpus.PretranslateTextIds;
srcCorpus = srcCorpus.FilterTexts(textIds);
trgCorpus = trgCorpus.FilterTexts(textIds);
}
IEnumerable<string>? textIds = corpus.PretranslateChapters is not null
? corpus.PretranslateChapters.Keys
: corpus.PretranslateTextIds;
srcCorpus = srcCorpus.FilterTexts(textIds);
trgCorpus = trgCorpus.FilterTexts(textIds);

int rowCount = 0;
StringBuilder srcSegBuffer = new();
Expand Down

0 comments on commit 054c9a6

Please sign in to comment.