Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
Enkidu93 committed Nov 7, 2023
1 parent de12933 commit fba7aea
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 2 deletions.
2 changes: 2 additions & 0 deletions src/SIL.Machine.AspNetCore/Models/Corpus.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ public class Corpus
public string Id { get; set; } = default!;
public string SourceLanguage { get; set; } = default!;
public string TargetLanguage { get; set; } = default!;
public bool TrainOnAll { get; set; }
public bool PretranslateAll { get; set; }
public HashSet<string> TrainOnTextIds { get; set; } = default!;
public HashSet<string> PretranslateTextIds { get; set; } = default!;
public List<CorpusFile> SourceFiles { get; set; } = default!;
public List<CorpusFile> TargetFiles { get; set; } = default!;
Expand Down
7 changes: 5 additions & 2 deletions src/SIL.Machine.AspNetCore/Services/NmtPreprocessBuildJob.cs
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,11 @@ async IAsyncEnumerable<Pretranslation> ProcessRowsAsync()

foreach (ParallelTextRow row in parallelCorpus)
{
await sourceTrainWriter.WriteAsync($"{row.SourceText}\n");
await targetTrainWriter.WriteAsync($"{row.TargetText}\n");
if (corpus.TrainOnAll || corpus.TrainOnTextIds.Contains(row.TextId))
{
await sourceTrainWriter.WriteAsync($"{row.SourceText}\n");
await targetTrainWriter.WriteAsync($"{row.TargetText}\n");
}
if (
(corpus.PretranslateAll || corpus.PretranslateTextIds.Contains(row.TextId))
&& row.SourceSegment.Count > 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,9 @@ private static Models.Corpus Map(Serval.Translation.V1.Corpus source)
Id = source.Id,
SourceLanguage = source.SourceLanguage,
TargetLanguage = source.TargetLanguage,
TrainOnAll = source.TrainOnAll,
PretranslateAll = source.PretranslateAll,
TrainOnTextIds = source.TrainOnTextIds.ToHashSet(),
PretranslateTextIds = source.PretranslateTextIds.ToHashSet(),
SourceFiles = source.SourceFiles.Select(Map).ToList(),
TargetFiles = source.TargetFiles.Select(Map).ToList()
Expand Down

0 comments on commit fba7aea

Please sign in to comment.