Skip to content

Commit

Permalink
Make seed optional; remove unneeded code
Browse files Browse the repository at this point in the history
  • Loading branch information
Enkidu93 committed Nov 18, 2024
1 parent 6ca6027 commit 5376918
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 10 deletions.
2 changes: 1 addition & 1 deletion src/SIL.Machine/Corpora/CorporaExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ public static INParallelTextCorpus AlignMany(
return nParallelTextCorpus;
}

public static ITextCorpus ChooseRandom(this IEnumerable<ITextCorpus> corpora, int seed)
public static ITextCorpus ChooseRandom(this IEnumerable<ITextCorpus> corpora, int? seed = null)
{
return new MergedTextCorpus(corpora, MergeRule.Random, seed);
}
Expand Down
9 changes: 0 additions & 9 deletions src/SIL.Machine/Corpora/NParallelTextCorpus.cs
Original file line number Diff line number Diff line change
Expand Up @@ -155,15 +155,6 @@ private IEnumerable<NParallelTextRow> GetRows(IList<IEnumerator<TextRow>> enumer
&& minRefIndexes.Any(i => !completed[i] && currentRows[i].IsInRange) //and at least one of the min rows is not completed and in a range
)
{
if (
rangeInfo.IsInRange
&& nonMinRefIndexes.Any(i =>
!completed[i] && currentRows[i].IsInRange && !currentRows[i].IsEmpty
) //At least one of the non-min rows is not completed, is in a range, and has content
)
{
yield return rangeInfo.CreateRow();
}
foreach (int i in minRefIndexes)
rangeInfo.AddTextRow(enumerators[i].Current, i);
foreach (int i in nonMinRefIndexes)
Expand Down
49 changes: 49 additions & 0 deletions tests/SIL.Machine.Tests/Corpora/ParallelTextCorpusTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,55 @@ public void GetRows_OverlappingRanges()
Assert.That(rows[0].IsTargetSentenceStart, Is.True);
}

[Test]
public void GetRows_OverlappingRangesAndMissingRow()
{
var sourceCorpus = new DictionaryTextCorpus(
new MemoryText(
"text1",
new[]
{
TextRow(
"text1",
1,
"source segment 1 . source segment 2 . source segment 3 .",
TextRowFlags.SentenceStart | TextRowFlags.InRange | TextRowFlags.RangeStart
),
TextRow("text1", 2, flags: TextRowFlags.InRange),
TextRow("text1", 3, flags: TextRowFlags.InRange)
}
)
);
var targetCorpus = new DictionaryTextCorpus(
new MemoryText(
"text1",
new[]
{
TextRow(
"text1",
3,
"target segment 3 . target segment 4 .",
TextRowFlags.SentenceStart | TextRowFlags.InRange | TextRowFlags.RangeStart
),
TextRow("text1", 4, flags: TextRowFlags.InRange)
}
)
);

var parallelCorpus = new ParallelTextCorpus(sourceCorpus, targetCorpus);
ParallelTextRow[] rows = parallelCorpus.ToArray();
Assert.That(rows.Length, Is.EqualTo(1));
Assert.That(rows[0].SourceRefs, Is.EqualTo(new[] { 1, 2, 3 }));
Assert.That(rows[0].TargetRefs, Is.EqualTo(new[] { 3, 4 }));
Assert.That(
rows[0].SourceSegment,
Is.EqualTo("source segment 1 . source segment 2 . source segment 3 .".Split())
);
Assert.That(rows[0].TargetSegment, Is.EqualTo("target segment 3 . target segment 4 .".Split()));
Assert.That(rows[0].IsSourceSentenceStart, Is.True);
Assert.That(rows[0].IsTargetSentenceStart, Is.True);
}

[Test]
public void GetRows_AdjacentRangesSameText()
{
Expand Down

0 comments on commit 5376918

Please sign in to comment.