Skip to content

Commit

Permalink
More fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Enkidu93 committed Nov 8, 2024
1 parent 282c473 commit bd0ec45
Show file tree
Hide file tree
Showing 5 changed files with 154 additions and 676 deletions.
49 changes: 35 additions & 14 deletions src/SIL.Machine/Corpora/CorporaExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,16 @@ public static IParallelTextCorpus AlignRows(
};
}

public static NParallelTextCorpus AlignMany(this ITextCorpus[] corpora, bool[] allRowsPerCorpus = null)
{
NParallelTextCorpus nParallelTextCorpus = new NParallelTextCorpus(corpora);
if (allRowsPerCorpus != null)
{
nParallelTextCorpus.AllRowsList = allRowsPerCorpus;
}
return nParallelTextCorpus;
}

public static (ITextCorpus, ITextCorpus, int, int) Split(
this ITextCorpus corpus,
double? percent = null,
Expand Down Expand Up @@ -564,35 +574,46 @@ public MergedCorpus(NParallelTextCorpus nParallelTextCorpus, MergeRule mergeRule

public override IEnumerable<TextRow> GetRows(IEnumerable<string> textIds)
{
int indexOfInRangeRow = -1;
foreach (NParallelTextRow nRow in _corpus.GetRows(textIds))
{
if (nRow.N == 0 || nRow.IsEmpty)
continue;
IReadOnlyList<int> nonEmptyIndices = nRow
.NSegments.Select((s, i) => (s, i))
.Where(pair => pair.s.Count > 0)
.Where(pair => pair.s.Count > 0 || nRow.GetIsInRange(pair.i))
.Select(pair => pair.i)
.ToList();
IReadOnlyList<int> indices =
nonEmptyIndices.Count > 0 ? nonEmptyIndices : Enumerable.Range(0, nRow.N).ToList();
if (indexOfInRangeRow == -1)
{
indices = indices.Where(i => nRow.GetIsRangeStart(i) || !nRow.GetIsInRange(i)).ToList();
}
if (indices.Count == 0)
continue;
int indexOfSelectedRow = -1;
switch (_mergeRule)
{
case MergeRule.First:
yield return new TextRow(nRow.TextId, nRow.NRefs[indices.First()])
{
Segment = nRow.NSegments[indices.First()],
Flags = nRow.NFlags[indices.First()]
};
indexOfSelectedRow = indices.First();
break;
case MergeRule.Random:
int i = _random.Next(0, indices.Count);
yield return new TextRow(nRow.TextId, nRow.NRefs[i])
{
Segment = nRow.NSegments[i],
Flags = nRow.NFlags[i]
};
indexOfSelectedRow = indices[_random.Next(0, indices.Count)];
break;
}
indexOfSelectedRow = indexOfInRangeRow != -1 ? indexOfInRangeRow : indexOfSelectedRow;
if (!nRow.GetIsInRange(indexOfSelectedRow))
{
indexOfInRangeRow = -1;
}
if (nRow.GetIsRangeStart(indexOfSelectedRow))
{
indexOfInRangeRow = indexOfSelectedRow;
}
yield return new TextRow(nRow.TextId, nRow.Ref)
{
Segment = nRow.NSegments[indexOfSelectedRow],
Flags = nRow.NFlags[indexOfSelectedRow]
};
}
}
}
Expand Down
30 changes: 17 additions & 13 deletions src/SIL.Machine/Corpora/NParallelTextCorpus.cs
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,10 @@ IEnumerator<AlignmentRow> alignmentEnumerator
var currentIncompleteRows = currentRows.Where((r, i) => !completed[i]).ToArray();
IList<int> nonMinRefIndexes = System.Linq.Enumerable.Range(0, N).Except(minRefIndexes).ToList();

if (minRefIndexes.Count < (N - completed.Count(c => c)) || completed.Where(c => !c).Count() == 1) //then there are some non-min refs or only one incomplete enumerator
if (
minRefIndexes.Count < (N - completed.Count(c => c))
|| completed.Where((c, i) => !c && minRefIndexes.Contains(i)).Count() == 1
) //then there are some non-min refs or only one incomplete enumerator
{
IList<IEnumerator<TextRow>> minEnumerators = minRefIndexes
.Select(i => listOfEnumerators[i])
Expand Down Expand Up @@ -285,7 +288,7 @@ NParallelTextRow row in CreateMinRefRows(
foreach (
NParallelTextRow row in CreateRows(
rangeInfo,
currentIncompleteRows,
currentRows.Select((r, i) => completed[i] ? null : r).ToArray(),
alignedWordPairs: AlignmentCorpus != null && compareAlignmentCorpus == 0
? alignmentEnumerator.Current.AlignedWordPairs.ToArray()
: null
Expand Down Expand Up @@ -338,29 +341,30 @@ private IEnumerable<NParallelTextRow> CreateRows(
if (rows.All(r => r == null))
throw new ArgumentNullException("A corpus row must be specified.");

object[] refRefs = new object[] { rows.Select(r => r?.Ref).First() };
object[] defaultRefs = new object[] { };
if (rows.Any(r => r != null))
defaultRefs = new object[] { rows.Where(r => r != null).Select(r => r.Ref).First() };
string textId = null;
IList<object[]> refs = new List<object[]>();
IList<TextRowFlags> flags = new List<TextRowFlags>();
object[][] refs = new object[N][];
TextRowFlags[] flags = new TextRowFlags[N];
for (int i = 0; i < rows.Count; i++)
{
if (rows[i] != null)
{
textId = textId ?? rows[i]?.TextId;
refs.Add(
CorrectVersification(rows[i].Ref == null ? new object[] { } : new object[] { rows[i].Ref }, i)
);
flags.Add(rows[i].Flags);
refs[i] = CorrectVersification(rows[i].Ref == null ? defaultRefs : new object[] { rows[i].Ref }, i);
flags[i] = rows[i].Flags;
}
else
{
if (Corpora[i].IsScripture())
refs.Add(CorrectVersification(refRefs, i));
refs[i] = CorrectVersification(defaultRefs, i);
else
refs.Add(new object[] { });
flags.Add(forceInRange != null && forceInRange[i] ? TextRowFlags.InRange : TextRowFlags.None);
refs[i] = new object[] { };
flags[i] = forceInRange != null && forceInRange[i] ? TextRowFlags.InRange : TextRowFlags.None;
}
}
refs = refs.Select(r => r ?? (new object[] { })).ToArray();

yield return new NParallelTextRow(textId, refs)
{
Expand Down Expand Up @@ -524,7 +528,7 @@ public NParallelTextRow CreateRow()
}
}

private class DefaultRowRefComparer : IComparer<object>
public class DefaultRowRefComparer : IComparer<object>
{
public int Compare(object x, object y)
{
Expand Down
4 changes: 2 additions & 2 deletions src/SIL.Machine/Corpora/NParallelTextRow.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ public NParallelTextRow(string textId, IEnumerable<IReadOnlyList<object>> nRefs)
if (string.IsNullOrEmpty(textId))
throw new ArgumentNullException(nameof(textId));

if (nRefs.SelectMany(r => r).Count() == 0)
throw new ArgumentNullException("Either a source or target ref must be provided.");
if (nRefs == null || nRefs.Where(r => r != null).SelectMany(r => r).Count() == 0)
throw new ArgumentNullException($"Refs must be provided but nRefs={nRefs}");

TextId = textId;
NRefs = nRefs.ToList().ToReadOnlyList();
Expand Down
Loading

0 comments on commit bd0ec45

Please sign in to comment.