From f483d0517b259045bffd5b9c39b8662f84521ec6 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 7 Nov 2023 12:17:46 -0500 Subject: [PATCH] Fixes https://github.com/sillsdev/serval/issues/202 --- src/SIL.Machine.AspNetCore/Models/Corpus.cs | 2 ++ .../Services/NmtPreprocessBuildJob.cs | 7 +++++-- .../Services/ServalTranslationEngineServiceV1.cs | 2 ++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/SIL.Machine.AspNetCore/Models/Corpus.cs b/src/SIL.Machine.AspNetCore/Models/Corpus.cs index 84dda461f..c33bc52ce 100644 --- a/src/SIL.Machine.AspNetCore/Models/Corpus.cs +++ b/src/SIL.Machine.AspNetCore/Models/Corpus.cs @@ -5,7 +5,9 @@ public class Corpus public string Id { get; set; } = default!; public string SourceLanguage { get; set; } = default!; public string TargetLanguage { get; set; } = default!; + public bool TrainOnAll { get; set; } public bool PretranslateAll { get; set; } + public HashSet TrainOnTextIds { get; set; } = default!; public HashSet PretranslateTextIds { get; set; } = default!; public List SourceFiles { get; set; } = default!; public List TargetFiles { get; set; } = default!; diff --git a/src/SIL.Machine.AspNetCore/Services/NmtPreprocessBuildJob.cs b/src/SIL.Machine.AspNetCore/Services/NmtPreprocessBuildJob.cs index e1202dec8..3fbc41ee0 100644 --- a/src/SIL.Machine.AspNetCore/Services/NmtPreprocessBuildJob.cs +++ b/src/SIL.Machine.AspNetCore/Services/NmtPreprocessBuildJob.cs @@ -76,8 +76,11 @@ async IAsyncEnumerable ProcessRowsAsync() foreach (ParallelTextRow row in parallelCorpus) { - await sourceTrainWriter.WriteAsync($"{row.SourceText}\n"); - await targetTrainWriter.WriteAsync($"{row.TargetText}\n"); + if (corpus.TrainOnAll || corpus.TrainOnTextIds.Contains(row.TextId)) + { + await sourceTrainWriter.WriteAsync($"{row.SourceText}\n"); + await targetTrainWriter.WriteAsync($"{row.TargetText}\n"); + } if ( (corpus.PretranslateAll || corpus.PretranslateTextIds.Contains(row.TextId)) && row.SourceSegment.Count > 0 diff --git a/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs b/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs index f57038f01..a0b010c61 100644 --- a/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs +++ b/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs @@ -236,7 +236,9 @@ private static Models.Corpus Map(Serval.Translation.V1.Corpus source) Id = source.Id, SourceLanguage = source.SourceLanguage, TargetLanguage = source.TargetLanguage, + TrainOnAll = source.TrainOnAll, PretranslateAll = source.PretranslateAll, + TrainOnTextIds = source.TrainOnTextIds.ToHashSet(), PretranslateTextIds = source.PretranslateTextIds.ToHashSet(), SourceFiles = source.SourceFiles.Select(Map).ToList(), TargetFiles = source.TargetFiles.Select(Map).ToList()