Skip to content

Commit

Permalink
Reviewer changes
Browse files Browse the repository at this point in the history
  • Loading branch information
Enkidu93 committed Nov 13, 2024
1 parent e07cf64 commit 54ae315
Show file tree
Hide file tree
Showing 2 changed files with 149 additions and 153 deletions.
300 changes: 148 additions & 152 deletions src/SIL.Machine/Corpora/NParallelTextCorpus.cs
Original file line number Diff line number Diff line change
Expand Up @@ -110,198 +110,194 @@ private IEnumerable<NParallelTextRow> GetRows(
IEnumerator<AlignmentRow> alignmentEnumerator
)
{
var rangeInfo = new NRangeInfo(N)
{
var rangeInfo = new NRangeInfo(N)
{
Versifications = Corpora.Select(c => c.Versification).ToArray(),
RowRefComparer = RowRefComparer
};
Versifications = Corpora.Select(c => c.Versification).ToArray(),
RowRefComparer = RowRefComparer
};

bool[] completed = enumerators.Select(e => !e.MoveNext()).ToArray();
bool[] completed = enumerators.Select(e => !e.MoveNext()).ToArray();

while (!completed.All(c => c))
while (!completed.All(c => c))
{
List<int> minRefIndexes;
List<TextRow> currentRows = enumerators.Select(e => e.Current).ToList();
try
{
minRefIndexes = MinRefIndexes(
currentRows
.Select(
(e, i) =>
{
if (!completed[i])
return e.Ref;
return null;
}
)
.ToArray()
)
.ToList();
}
catch (ArgumentException)
{
List<int> minRefIndexes;
List<TextRow> currentRows = enumerators.Select(e => e.Current).ToList();
try
throw new CorpusAlignmentException(currentRows.Select(e => e.Ref.ToString()).ToArray());
}
TextRow[] currentIncompleteRows = currentRows.Where((r, i) => !completed[i]).ToArray();
List<int> nonMinRefIndexes = Enumerable.Range(0, N).Except(minRefIndexes).ToList();
int numberOfRemainingRows = N - completed.Count(c => c);
if (minRefIndexes.Count < numberOfRemainingRows || minRefIndexes.Count(i => !completed[i]) == 1)
//then there are some non-min refs or only one incomplete enumerator
{
List<IEnumerator<TextRow>> minEnumerators = minRefIndexes.Select(i => enumerators[i]).ToList();
List<IEnumerator<TextRow>> nonMinEnumerators = nonMinRefIndexes
.Select(i => enumerators[i])
.ToList();

if (
nonMinRefIndexes.Any(i => !AllRows[i])
&& minRefIndexes.Any(i => !completed[i] && currentRows[i].IsInRange)
)
{
minRefIndexes = MinRefIndexes(
currentRows
.Select(
(e, i) =>
{
if (!completed[i])
return e.Ref;
return null;
}
if (
rangeInfo.IsInRange
&& nonMinEnumerators.Any(e =>
e.Current != null && e.Current.IsInRange && e.Current.Segment.Count > 0
)
)
{
yield return rangeInfo.CreateRow();
}
minRefIndexes.ForEach(i => rangeInfo.AddTextRow(enumerators[i].Current, i));
nonMinRefIndexes.ForEach(i => rangeInfo.Rows[i].SameRefRows.Clear());
}
else
{
bool anyNonMinEnumeratorsMidRange = nonMinRefIndexes.Any(i =>
!completed[i] && !currentRows[i].IsRangeStart && currentRows[i].IsInRange
);
foreach (
NParallelTextRow row in CreateMinRefRows(
rangeInfo,
currentRows.ToArray(),
minRefIndexes.ToArray(),
nonMinRefIndexes.ToArray(),
forceInRange: minRefIndexes
.Select(i =>
anyNonMinEnumeratorsMidRange
&& nonMinRefIndexes.All(j =>
!completed[j] && currentRows[j].TextId == currentRows[i].TextId
)
)
.ToArray()
.ToList()
)
.ToList();
)
{
yield return row;
}
}
catch (ArgumentException)
foreach (int i in minRefIndexes)
{
throw new CorpusAlignmentException(currentRows.Select(e => e.Ref.ToString()).ToArray());
rangeInfo.Rows[i].SameRefRows.Add(enumerators[i].Current);
completed[i] = !enumerators[i].MoveNext();
}
TextRow[] currentIncompleteRows = currentRows.Where((r, i) => !completed[i]).ToArray();
List<int> nonMinRefIndexes = Enumerable.Range(0, N).Except(minRefIndexes).ToList();
int numberOfRemainingRows = N - completed.Count(c => c);
if (minRefIndexes.Count < numberOfRemainingRows || minRefIndexes.Count(i => !completed[i]) == 1)
//then there are some non-min refs or only one incomplete enumerator
}
else if (minRefIndexes.Count == numberOfRemainingRows)
// the refs are all the same
{
int compareAlignmentCorpus = -1;
if (AlignmentCorpus != null)
{
List<IEnumerator<TextRow>> minEnumerators = minRefIndexes.Select(i => enumerators[i]).ToList();
List<IEnumerator<TextRow>> nonMinEnumerators = nonMinRefIndexes
.Select(i => enumerators[i])
.ToList();

if (
nonMinRefIndexes.Any(i => !AllRows[i])
&& minRefIndexes.Any(i => !completed[i] && currentRows[i].IsInRange)
)
do
{
if (
rangeInfo.IsInRange
&& nonMinEnumerators.Any(e =>
e.Current != null && e.Current.IsInRange && e.Current.Segment.Count > 0
)
)
try
{
yield return rangeInfo.CreateRow();
compareAlignmentCorpus = alignmentEnumerator.MoveNext()
? RowRefComparer.Compare(
currentIncompleteRows[0].Ref,
alignmentEnumerator.Current.Ref
)
: 1;
}
minRefIndexes.ForEach(i => rangeInfo.AddTextRow(enumerators[i].Current, i));
nonMinRefIndexes.ForEach(i => rangeInfo.Rows[i].SameRefRows.Clear());
}
else
{
bool anyNonMinEnumeratorsMidRange = nonMinRefIndexes.Any(i =>
!completed[i] && !currentRows[i].IsRangeStart && currentRows[i].IsInRange
);
foreach (
NParallelTextRow row in CreateMinRefRows(
rangeInfo,
currentRows.ToArray(),
minRefIndexes.ToArray(),
nonMinRefIndexes.ToArray(),
forceInRange: minRefIndexes
.Select(i =>
anyNonMinEnumeratorsMidRange
&& nonMinRefIndexes.All(j =>
!completed[j] && currentRows[j].TextId == currentRows[i].TextId
)
)
.ToList()
)
)
catch (ArgumentException)
{
yield return row;
throw new CorpusAlignmentException(currentRows.Select(e => e.Ref.ToString()).ToArray());
}
}
foreach (int i in minRefIndexes)
{
rangeInfo.Rows[i].SameRefRows.Add(enumerators[i].Current);
completed[i] = !enumerators[i].MoveNext();
}
} while (compareAlignmentCorpus < 0);
}
else if (minRefIndexes.Count == numberOfRemainingRows)
// the refs are all the same

if (
minRefIndexes
.Select(i =>
enumerators[i].Current.IsInRange && minRefIndexes.All(j => j == i || !AllRows[j])
)
.Any(b => b)
)
{
int compareAlignmentCorpus = -1;
if (AlignmentCorpus != null)
if (rangeInfo.IsInRange && AllInRangeHaveSegments(currentIncompleteRows))
{
do
{
try
{
compareAlignmentCorpus = alignmentEnumerator.MoveNext()
? RowRefComparer.Compare(
currentIncompleteRows[0].Ref,
alignmentEnumerator.Current.Ref
)
: 1;
}
catch (ArgumentException)
{
throw new CorpusAlignmentException(
currentRows.Select(e => e.Ref.ToString()).ToArray()
);
}
} while (compareAlignmentCorpus < 0);
yield return rangeInfo.CreateRow();
}

if (
minRefIndexes
.Select(i =>
enumerators[i].Current.IsInRange && minRefIndexes.All(j => j == i || !AllRows[j])
)
.Any(b => b)
)
for (int i = 0; i < rangeInfo.Rows.Count; i++)
{
if (rangeInfo.IsInRange && AllInRangeHaveSegments(currentIncompleteRows))
{
yield return rangeInfo.CreateRow();
}

for (int i = 0; i < rangeInfo.Rows.Count; i++)
{
rangeInfo.AddTextRow(currentRows[i], i);
rangeInfo.Rows[i].SameRefRows.Clear();
}
rangeInfo.AddTextRow(currentRows[i], i);
rangeInfo.Rows[i].SameRefRows.Clear();
}
else
}
else
{
for (int i = 0; i < rangeInfo.Rows.Count; i++)
{
for (int i = 0; i < rangeInfo.Rows.Count; i++)
for (int j = 0; j < rangeInfo.Rows.Count; j++) //TODO rework
{
for (int j = 0; j < rangeInfo.Rows.Count; j++) //TODO rework
{
if (i == j || completed[i] || completed[j])
continue;
if (i == j || completed[i] || completed[j])
continue;

if (rangeInfo.CheckSameRefRows(rangeInfo.Rows[i].SameRefRows, currentRows[j]))
if (rangeInfo.CheckSameRefRows(rangeInfo.Rows[i].SameRefRows, currentRows[j]))
{
foreach (TextRow tr in rangeInfo.Rows[i].SameRefRows)
{
foreach (TextRow tr in rangeInfo.Rows[i].SameRefRows)
var textRows = new TextRow[N];
textRows[i] = tr;
textRows[j] = currentRows[j];
foreach (NParallelTextRow r in CreateRows(rangeInfo, textRows))
{
var textRows = new TextRow[N];
textRows[i] = tr;
textRows[j] = currentRows[j];
foreach (NParallelTextRow r in CreateRows(rangeInfo, textRows))
{
yield return r;
}
yield return r;
}
}
}
}
foreach (
NParallelTextRow row in CreateRows(
rangeInfo,
currentRows.Select((r, i) => completed[i] ? null : r).ToArray(),
alignedWordPairs: AlignmentCorpus != null && compareAlignmentCorpus == 0
? alignmentEnumerator.Current.AlignedWordPairs.ToArray()
: null
)
)
{
yield return row;
}
}

for (int i = 0; i < rangeInfo.Rows.Count; i++)
foreach (
NParallelTextRow row in CreateRows(
rangeInfo,
currentRows.Select((r, i) => completed[i] ? null : r).ToArray(),
alignedWordPairs: AlignmentCorpus != null && compareAlignmentCorpus == 0
? alignmentEnumerator.Current.AlignedWordPairs.ToArray()
: null
)
)
{
rangeInfo.Rows[i].SameRefRows.Add(currentRows[i]);
completed[i] = !enumerators[i].MoveNext();
yield return row;
}
}
else

for (int i = 0; i < rangeInfo.Rows.Count; i++)
{
throw new CorpusAlignmentException(
minRefIndexes.Select(i => currentRows[i].Ref.ToString()).ToArray()
);
rangeInfo.Rows[i].SameRefRows.Add(currentRows[i]);
completed[i] = !enumerators[i].MoveNext();
}
}

if (rangeInfo.IsInRange)
yield return rangeInfo.CreateRow();
else
{
throw new CorpusAlignmentException(
minRefIndexes.Select(i => currentRows[i].Ref.ToString()).ToArray()
);
}
}

if (rangeInfo.IsInRange)
yield return rangeInfo.CreateRow();
}

private object[] CorrectVersification(object[] refs, int i)
Expand Down
2 changes: 1 addition & 1 deletion src/SIL.Machine/Corpora/ParallelTextCorpus.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public ParallelTextCorpus(
public IAlignmentCorpus AlignmentCorpus { get; }
public IComparer<object> RowRefComparer { get; }

private NParallelTextCorpus NParallelTextCorpus { get; set; }
public NParallelTextCorpus NParallelTextCorpus { get; }

public override IEnumerable<ParallelTextRow> GetRows(IEnumerable<string> textIds)
{
Expand Down

0 comments on commit 54ae315

Please sign in to comment.