From fd763543ae7f82d88ca732c5bbcabd7e63c9440b Mon Sep 17 00:00:00 2001 From: John Lambert Date: Wed, 23 Oct 2024 17:09:10 -0400 Subject: [PATCH] More broken. --- .../Corpora/NParallelTextCorpus.cs | 82 +++++++++++++++++-- 1 file changed, 74 insertions(+), 8 deletions(-) diff --git a/src/SIL.Machine/Corpora/NParallelTextCorpus.cs b/src/SIL.Machine/Corpora/NParallelTextCorpus.cs index 4f461fc0..0530d0cb 100644 --- a/src/SIL.Machine/Corpora/NParallelTextCorpus.cs +++ b/src/SIL.Machine/Corpora/NParallelTextCorpus.cs @@ -149,7 +149,39 @@ private IEnumerable GetRows(IList> listOf .Select(i => listOfEnumerators[i]) .ToList(); - if (!allNonMinRows.Any() && minEnumerators.Select(e => e.Current.IsInRange).Any()) { } + if (!allNonMinRows.Any() && minEnumerators.Select(e => e.Current.IsInRange).Any()) + { + if ( + rangeInfo.IsInRange + && nonMinEnumerators + .Select(e => e.Current.IsInRange && e.Current.Segment.Count > 0) + .Any() + ) + { + yield return rangeInfo.CreateRow(); + } + minRefIndexes.ForEach(i => rangeInfo.AddTextRow(listOfEnumerators[i].Current, i)); + nonMinRefIndexes.ForEach(i => rangeInfo.Rows[i].SameRefRows.Clear()); + } + else + { + foreach ( + NParallelTextRow row in CreateMinRefRows( + rangeInfo, + minEnumerators.Select(e => e.Current).ToList(), + nonMinEnumerators.Select(e => e.Current).ToList(), + allNonMinRows + ) + ) + { + yield return row; + } + foreach (int i in nonMinRefIndexes) + { + rangeInfo.Rows[i].SameRefRows.Add(listOfEnumerators[i].Current); + listOfEnumerators[i].MoveNext(); + } + } // source is less than target if (!AllTargetRows && srcEnumerator.Current.IsInRange) { @@ -566,14 +598,17 @@ private bool CheckSameRefRows(List sameRefRows, TextRow otherRow) return sameRefRows.Count > 0; } - private IEnumerable CreateNRows( + private IEnumerable CreateMinRefRows( NRangeInfo rangeInfo, - TextRow sourceRow, - int index, - List targetSameRefRows, - bool forceTargetInRange = false + IList currentRows, + IList minRefIndexes, + IList nonMinRefIndexes, + bool forceInRange = false ) { + IList minRows = minRefIndexes.Select(i => currentRows[i]).ToList(); + IList nonMinRows = nonMinRefIndexes.Select(i => currentRows[i]).ToList(); + if (CheckSameRefRows(targetSameRefRows, sourceRow)) { foreach (TextRow targetSameRefRow in targetSameRefRows) @@ -631,8 +666,9 @@ NParallelTextRow row in CreateRows( private class RangeRow { - public List Refs { get; } = new List(); - public List Segment { get; } = new List(); + public IList Refs { get; } = new List(); + public IList Segment { get; } = new List(); + public IList SameRefRows { get; } = new List(); public bool IsSentenceStart { get; set; } = false; public bool IsInRange => Refs.Count > 0; public bool IsEmpty => Segment.Count == 0; @@ -643,9 +679,39 @@ private class NRangeInfo public int N = -1; public string TextId { get; set; } = ""; public ScrVers Versification { get; set; } = null; + public IComparer RowRefComparer { get; set; } = null; public List Rows { get; } = new List(); public bool IsInRange => Rows.Any(r => r.IsInRange); + private bool CheckSameRefRows(List sameRefRows, TextRow otherRow) + { + try + { + if (sameRefRows.Count > 0 && RowRefComparer.Compare(sameRefRows[0].Ref, otherRow.Ref) != 0) + sameRefRows.Clear(); + } + catch (ArgumentException) + { + throw new CorpusAlignmentException(sameRefRows[0].Ref.ToString(), otherRow.Ref.ToString()); + } + return sameRefRows.Count > 0; + } + + public void AddTextRow(TextRow row, int index) + { + if (N <= row.Segment.Count) + { + throw new ArgumentOutOfRangeException( + $"There are only {N} parallel texts, but text {index} was chosen." + ); + } + TextId = row.TextId; + Rows[index].Refs.Add(row.Ref); + if (Rows[index].IsEmpty) + Rows[index].IsSentenceStart = row.IsSentenceStart; + Rows[index].Segment.AddRange(row.Segment); + } + public NParallelTextRow CreateRow() { object[] refs = new object[0];