Skip to content

Commit

Permalink
Deduplicate kbts
Browse files Browse the repository at this point in the history
  • Loading branch information
Enkidu93 committed Dec 2, 2024
1 parent 03e4716 commit 7b9c4a8
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ public async Task RunAsync_EnableKeyTerms()
Assert.That(src1Count, Is.EqualTo(14));
Assert.That(src2Count, Is.EqualTo(0));
Assert.That(trgCount, Is.EqualTo(1));
Assert.That(termCount, Is.EqualTo(166));
Assert.That(termCount, Is.EqualTo(144));
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,9 @@ public async Task Preprocess(
IParallelTextCorpus parallelKeyTermsCorpus = sourceTermCorpora
.ChooseRandom(Seed)
.AlignRows(targetTermCorpora.ChooseFirst());
foreach (ParallelTextRow row in parallelKeyTermsCorpus)
foreach (
ParallelTextRow row in parallelKeyTermsCorpus.DistinctBy(row => row.SourceText + row.TargetText)
)
{
await train(new Row(row.TextId, row.Refs, row.SourceText, row.TargetText, 1));
}
Expand Down

0 comments on commit 7b9c4a8

Please sign in to comment.