diff --git a/src/SIL.Machine.AspNetCore/Services/NmtPreprocessBuildJob.cs b/src/SIL.Machine.AspNetCore/Services/NmtPreprocessBuildJob.cs index 2b78a0fec..a7f86e20d 100644 --- a/src/SIL.Machine.AspNetCore/Services/NmtPreprocessBuildJob.cs +++ b/src/SIL.Machine.AspNetCore/Services/NmtPreprocessBuildJob.cs @@ -149,7 +149,7 @@ CancellationToken cancellationToken continue; } - Row[] trainRows = rows.Where(r => IsIncluded(r, corpus.TrainOnChapters)).Cast().ToArray(); + Row[] trainRows = rows.Where(row => IsInTrain(row, corpus)).Cast().ToArray(); if (trainRows.Length > 0) { Row row = trainRows[0]; @@ -187,7 +187,7 @@ CancellationToken cancellationToken foreach (Row row in AlignPretranslateCorpus(corpus, sourceTextCorpora[0], targetTextCorpus)) { if ( - IsIncluded(row, corpus.PretranslateChapters) + IsInPretranslate(row, corpus) && row.SourceSegment.Length > 0 && (row.TargetSegment.Length == 0 || !IsInTrain(row, corpus)) ) @@ -231,22 +231,24 @@ JobCompletionStatus completionStatus } } - private static bool IsInTrain(Row row, Corpus corpus) + private static bool IsInTrain(Row? row, Corpus corpus) { return IsIncluded(row, corpus.TrainOnTextIds, corpus.TrainOnChapters); } - private static bool IsInPretranslate(Row row, Corpus corpus) + private static bool IsInPretranslate(Row? row, Corpus corpus) { return IsIncluded(row, corpus.PretranslateTextIds, corpus.PretranslateChapters); } private static bool IsIncluded( - Row row, + Row? row, IReadOnlySet? textIds, IReadOnlyDictionary>? chapters ) { + if (row is null) + return false; if (chapters is not null) { return row.Refs.Any(r => IsInChapters(chapters, r)); @@ -272,14 +274,11 @@ private static bool IsInChapters(IReadOnlyDictionary> bookC ITextCorpus trgCorpus ) { - if (!corpus.TrainOnAll) - { - IEnumerable textIds = corpus.TrainOnChapters is not null - ? corpus.TrainOnChapters.Keys - : corpus.TrainOnTextIds; - srcCorpora = srcCorpora.Select(sc => sc.FilterTexts(textIds)).ToArray(); - trgCorpus = trgCorpus.FilterTexts(textIds); - } + IEnumerable? textIds = corpus.TrainOnChapters is not null + ? corpus.TrainOnChapters.Keys + : corpus.TrainOnTextIds; + srcCorpora = srcCorpora.Select(sc => sc.FilterTexts(textIds)).ToArray(); + trgCorpus = trgCorpus.FilterTexts(textIds); if (trgCorpus.IsScripture()) { @@ -396,14 +395,11 @@ ITextCorpus trgCorpus private static IEnumerable AlignPretranslateCorpus(Corpus corpus, ITextCorpus srcCorpus, ITextCorpus trgCorpus) { - if (!corpus.PretranslateAll) - { - IEnumerable textIds = corpus.PretranslateChapters is not null - ? corpus.PretranslateChapters.Keys - : corpus.PretranslateTextIds; - srcCorpus = srcCorpus.FilterTexts(textIds); - trgCorpus = trgCorpus.FilterTexts(textIds); - } + IEnumerable? textIds = corpus.PretranslateChapters is not null + ? corpus.PretranslateChapters.Keys + : corpus.PretranslateTextIds; + srcCorpus = srcCorpus.FilterTexts(textIds); + trgCorpus = trgCorpus.FilterTexts(textIds); int rowCount = 0; StringBuilder srcSegBuffer = new();