diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs b/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs index 13785191..470817cc 100644 --- a/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs @@ -121,7 +121,7 @@ public async Task RunAsync_EnableKeyTerms() Assert.That(src1Count, Is.EqualTo(14)); Assert.That(src2Count, Is.EqualTo(0)); Assert.That(trgCount, Is.EqualTo(1)); - Assert.That(termCount, Is.EqualTo(166)); + Assert.That(termCount, Is.EqualTo(144)); }); } diff --git a/src/ServiceToolkit/src/SIL.ServiceToolkit/Services/ParallelCorpusPreprocessingService.cs b/src/ServiceToolkit/src/SIL.ServiceToolkit/Services/ParallelCorpusPreprocessingService.cs index 71769985..7ef4d67c 100644 --- a/src/ServiceToolkit/src/SIL.ServiceToolkit/Services/ParallelCorpusPreprocessingService.cs +++ b/src/ServiceToolkit/src/SIL.ServiceToolkit/Services/ParallelCorpusPreprocessingService.cs @@ -101,7 +101,9 @@ public async Task Preprocess( IParallelTextCorpus parallelKeyTermsCorpus = sourceTermCorpora .ChooseRandom(Seed) .AlignRows(targetTermCorpora.ChooseFirst()); - foreach (ParallelTextRow row in parallelKeyTermsCorpus) + foreach ( + ParallelTextRow row in parallelKeyTermsCorpus.DistinctBy(row => row.SourceText + row.TargetText) + ) { await train(new Row(row.TextId, row.Refs, row.SourceText, row.TargetText, 1)); }