diff --git a/src/SIL.Machine/Corpora/CorporaExtensions.cs b/src/SIL.Machine/Corpora/CorporaExtensions.cs index a9e2b4f9..38ad693c 100644 --- a/src/SIL.Machine/Corpora/CorporaExtensions.cs +++ b/src/SIL.Machine/Corpora/CorporaExtensions.cs @@ -245,16 +245,6 @@ public static IParallelTextCorpus AlignRows( }; } - public static NParallelTextCorpus AlignMany(this ITextCorpus[] corpora, bool[] allRowsPerCorpus = null) - { - NParallelTextCorpus nParallelTextCorpus = new NParallelTextCorpus(corpora); - if (allRowsPerCorpus != null) - { - nParallelTextCorpus.AllRowsList = allRowsPerCorpus; - } - return nParallelTextCorpus; - } - public static (ITextCorpus, ITextCorpus, int, int) Split( this ITextCorpus corpus, double? percent = null, @@ -377,16 +367,6 @@ public static ITextCorpus FilterTexts(this ITextCorpus corpus, IEnumerable GetRows(IEnumerable textIds) } } + #endregion + + #region INParallelTextCorpus operations + + public static INParallelTextCorpus AlignMany( + this IEnumerable corpora, + IEnumerable allRowsPerCorpus = null + ) + { + NParallelTextCorpus nParallelTextCorpus = new NParallelTextCorpus(corpora); + if (allRowsPerCorpus != null) + { + nParallelTextCorpus.AllRows = allRowsPerCorpus.ToArray(); + } + return nParallelTextCorpus; + } + + public static ITextCorpus ChooseRandom(this INParallelTextCorpus corpus, int seed) + { + return new MergedCorpus(corpus, MergeRule.Random, seed); + } + + public static ITextCorpus ChooseFirst(this INParallelTextCorpus corpus) + { + return new MergedCorpus(corpus, MergeRule.First, 0); + } + private enum MergeRule { - First = 1, - Random = 2 + First, + Random } private class MergedCorpus : TextCorpusBase { - private readonly NParallelTextCorpus _corpus; + private readonly INParallelTextCorpus _corpus; private readonly MergeRule _mergeRule; private readonly Random _random; - private readonly int _seed; - - public MergedCorpus(NParallelTextCorpus nParallelTextCorpus, MergeRule mergeRule, int seed) + public MergedCorpus(INParallelTextCorpus nParallelTextCorpus, MergeRule mergeRule, int seed) { _corpus = nParallelTextCorpus; _mergeRule = mergeRule; - _seed = seed; - _random = new Random(_seed); + _random = new Random(seed); } public override IEnumerable Texts => _corpus.Corpora.SelectMany(c => c.Texts); - public override bool IsTokenized => - Enumerable.Range(0, _corpus.N).Select(i => _corpus.GetIsTokenized(i)).All(b => b); + public override bool IsTokenized => Enumerable.Range(0, _corpus.N).All(i => _corpus.IsTokenized(i)); - public override ScrVers Versification => _corpus.N > 0 ? _corpus.Corpora.First().Versification : null; + public override ScrVers Versification => _corpus.N > 0 ? _corpus.Corpora[0].Versification : null; public override IEnumerable GetRows(IEnumerable textIds) { @@ -579,14 +582,14 @@ public override IEnumerable GetRows(IEnumerable textIds) { IReadOnlyList nonEmptyIndices = nRow .NSegments.Select((s, i) => (s, i)) - .Where(pair => pair.s.Count > 0 || nRow.GetIsInRange(pair.i)) + .Where(pair => pair.s.Count > 0 || nRow.IsInRange(pair.i)) .Select(pair => pair.i) .ToList(); IReadOnlyList indices = nonEmptyIndices.Count > 0 ? nonEmptyIndices : Enumerable.Range(0, nRow.N).ToList(); if (indexOfInRangeRow == -1) { - indices = indices.Where(i => nRow.GetIsRangeStart(i) || !nRow.GetIsInRange(i)).ToList(); + indices = indices.Where(i => nRow.IsRangeStart(i) || !nRow.IsInRange(i)).ToList(); } if (indices.Count == 0) continue; @@ -601,11 +604,11 @@ public override IEnumerable GetRows(IEnumerable textIds) break; } indexOfSelectedRow = indexOfInRangeRow != -1 ? indexOfInRangeRow : indexOfSelectedRow; - if (!nRow.GetIsInRange(indexOfSelectedRow)) + if (!nRow.IsInRange(indexOfSelectedRow)) { indexOfInRangeRow = -1; } - if (nRow.GetIsRangeStart(indexOfSelectedRow)) + if (nRow.IsRangeStart(indexOfSelectedRow)) { indexOfInRangeRow = indexOfSelectedRow; } diff --git a/src/SIL.Machine/Corpora/INParallelTextCorpus.cs b/src/SIL.Machine/Corpora/INParallelTextCorpus.cs index 5a1e86f7..0dfde2fa 100644 --- a/src/SIL.Machine/Corpora/INParallelTextCorpus.cs +++ b/src/SIL.Machine/Corpora/INParallelTextCorpus.cs @@ -4,6 +4,10 @@ namespace SIL.Machine.Corpora { public interface INParallelTextCorpus : ICorpus { + int N { get; } + IReadOnlyList Corpora { get; } + + bool IsTokenized(int i); int Count(bool includeEmpty = true, IEnumerable textIds = null); IEnumerable GetRows(IEnumerable textIds); diff --git a/src/SIL.Machine/Corpora/NParallelTextCorpus.cs b/src/SIL.Machine/Corpora/NParallelTextCorpus.cs index b9da9797..f5dfd119 100644 --- a/src/SIL.Machine/Corpora/NParallelTextCorpus.cs +++ b/src/SIL.Machine/Corpora/NParallelTextCorpus.cs @@ -3,7 +3,6 @@ using System.Collections.Immutable; using System.Linq; using SIL.Extensions; -using SIL.Linq; using SIL.Scripture; namespace SIL.Machine.Corpora @@ -16,62 +15,51 @@ public NParallelTextCorpus(IEnumerable corpora, IComparer r if (Corpora.Count < 1) throw new ArgumentException("There must be at least one corpora.", nameof(corpora)); RowRefComparer = rowRefComparer ?? new DefaultRowRefComparer(); - AllRowsList = new bool[Corpora.Count] + AllRows = new bool[Corpora.Count] .Select(_ => false) .ToImmutableArray(); } - public bool GetIsTokenized(int i) => + public override bool IsTokenized(int i) => i < Corpora.Count ? Corpora[i].IsTokenized : throw new ArgumentOutOfRangeException(nameof(i)); - public int N => Corpora.Count; - - public IReadOnlyList AllRowsList { get; set; } - public IReadOnlyList Corpora { get; } + public override int N => Corpora.Count; + public IReadOnlyList AllRows { get; set; } + public override IReadOnlyList Corpora { get; } public IAlignmentCorpus AlignmentCorpus { get; set; } public IComparer RowRefComparer { get; } - private static HashSet GetTextIdsFromCorpora( - IEnumerable corpora, - IEnumerable allRowsEnumerate - ) + private HashSet GetTextIdsFromCorpora() { - IReadOnlyList> textIdListOfLists = corpora - .Select(c => c.Texts.Select(t => t.Id)) - .ToImmutableArray(); - - HashSet textIds = textIdListOfLists - .Skip(1) - .Aggregate( - new HashSet(textIdListOfLists.First()), - (h, e) => - { - h.IntersectWith(e); - return h; - } - ); - allRowsEnumerate - .Select((allRows, i) => (allRows, i)) - .Where(t => t.allRows) - .ForEach(t => textIds.UnionWith(textIdListOfLists[t.i])); + HashSet textIds = new HashSet(); + HashSet allRowsTextIds = new HashSet(); + for (int i = 0; i < Corpora.Count; i++) + { + if (i == 0) + textIds.AddRange(Corpora[i].Texts.Select(t => t.Id)); + else + textIds.IntersectWith(Corpora[i].Texts.Select(t => t.Id)); + if (AllRows[i]) + allRowsTextIds.AddRange(Corpora[i].Texts.Select(t => t.Id)); + } + textIds.UnionWith(allRowsTextIds); return textIds; } public override IEnumerable GetRows(IEnumerable textIds) { - HashSet filterTextIds = GetTextIdsFromCorpora(Corpora, AllRowsList); + HashSet filterTextIds = GetTextIdsFromCorpora(); if (textIds != null) filterTextIds.IntersectWith(textIds); IEnumerator alignmentEnumerator = null; - IList> enumeratedCorpora = new List>(); - IEnumerable rows = new List() { }; + List> enumeratedCorpora = new List>(); try { for (int i = 0; i < Corpora.Count; i++) { - var enumerator = Corpora[i].GetRows(filterTextIds).GetEnumerator(); + IEnumerator enumerator = Corpora[i].GetRows(filterTextIds).GetEnumerator(); enumeratedCorpora.Add( new TextCorpusEnumerator(enumerator, Corpora[0].Versification, Corpora[i].Versification) ); @@ -79,7 +67,8 @@ public override IEnumerable GetRows(IEnumerable textId if (AlignmentCorpus != null) alignmentEnumerator = AlignmentCorpus.GetRows(filterTextIds).GetEnumerator(); - rows = GetRows(enumeratedCorpora, alignmentEnumerator).ToList(); + foreach (NParallelTextRow row in GetRows(enumeratedCorpora, alignmentEnumerator)) + yield return row; } finally { @@ -89,10 +78,9 @@ public override IEnumerable GetRows(IEnumerable textId } alignmentEnumerator?.Dispose(); } - return rows; } - private bool AllInRangeHaveSegments(IList rows) + private static bool AllInRangeHaveSegments(IList rows) { return rows.All(r => (r.IsInRange && r.Segment.Count > 0) || (!r.IsInRange)); } @@ -100,7 +88,7 @@ private bool AllInRangeHaveSegments(IList rows) private IList MinRefIndexes(IList refs) { object minRef = refs[0]; - IList minRefIndexes = new List() { 0 }; + List minRefIndexes = new List() { 0 }; for (int i = 1; i < refs.Count; i++) { if (RowRefComparer.Compare(refs[i], minRef) < 0) @@ -118,7 +106,7 @@ private IList MinRefIndexes(IList refs) } private IEnumerable GetRows( - IList> listOfEnumerators, + IList> enumerators, IEnumerator alignmentEnumerator ) { @@ -129,63 +117,65 @@ IEnumerator alignmentEnumerator RowRefComparer = RowRefComparer }; - bool[] completed = listOfEnumerators.Select(e => !e.MoveNext()).ToArray(); + bool[] completed = enumerators.Select(e => !e.MoveNext()).ToArray(); while (!completed.All(c => c)) { - IList minRefIndexes; - IList currentRows = listOfEnumerators.Select(e => e.Current).ToArray(); + List minRefIndexes; + List currentRows = enumerators.Select(e => e.Current).ToList(); try { minRefIndexes = MinRefIndexes( - currentRows - .Select(e => - { - if (e != null) - return e.Ref; - return null; - }) - .ToArray() - ); + currentRows + .Select( + (e, i) => + { + if (!completed[i]) + return e.Ref; + return null; + } + ) + .ToArray() + ) + .ToList(); } catch (ArgumentException) { throw new CorpusAlignmentException(currentRows.Select(e => e.Ref.ToString()).ToArray()); } - var currentIncompleteRows = currentRows.Where((r, i) => !completed[i]).ToArray(); - IList nonMinRefIndexes = System.Linq.Enumerable.Range(0, N).Except(minRefIndexes).ToList(); - - if ( - minRefIndexes.Count < (N - completed.Count(c => c)) - || completed.Where((c, i) => !c && minRefIndexes.Contains(i)).Count() == 1 - ) //then there are some non-min refs or only one incomplete enumerator + TextRow[] currentIncompleteRows = currentRows.Where((r, i) => !completed[i]).ToArray(); + List nonMinRefIndexes = Enumerable.Range(0, N).Except(minRefIndexes).ToList(); + int numberOfRemainingRows = N - completed.Count(c => c); + if (minRefIndexes.Count < numberOfRemainingRows || minRefIndexes.Count(i => !completed[i]) == 1) + //then there are some non-min refs or only one incomplete enumerator { - IList> minEnumerators = minRefIndexes - .Select(i => listOfEnumerators[i]) - .ToList(); - IList> nonMinEnumerators = nonMinRefIndexes - .Select(i => listOfEnumerators[i]) + List> minEnumerators = minRefIndexes.Select(i => enumerators[i]).ToList(); + List> nonMinEnumerators = nonMinRefIndexes + .Select(i => enumerators[i]) .ToList(); if ( - nonMinRefIndexes.Any(i => !AllRowsList[i]) - && minRefIndexes.Where(i => !completed[i] && listOfEnumerators[i].Current.IsInRange).Any() + nonMinRefIndexes.Any(i => !AllRows[i]) + && minRefIndexes.Any(i => !completed[i] && currentRows[i].IsInRange) ) { if ( rangeInfo.IsInRange - && nonMinEnumerators - .Where(e => e.Current != null && e.Current.IsInRange && e.Current.Segment.Count > 0) - .Any() + && nonMinEnumerators.Any(e => + e.Current != null && e.Current.IsInRange && e.Current.Segment.Count > 0 + ) ) { yield return rangeInfo.CreateRow(); } - minRefIndexes.ForEach(i => rangeInfo.AddTextRow(listOfEnumerators[i].Current, i)); + minRefIndexes.ForEach(i => rangeInfo.AddTextRow(enumerators[i].Current, i)); nonMinRefIndexes.ForEach(i => rangeInfo.Rows[i].SameRefRows.Clear()); } else { + bool anyNonMinEnumeratorsMidRange = nonMinRefIndexes.Any(i => + !completed[i] && !currentRows[i].IsRangeStart && currentRows[i].IsInRange + ); foreach ( NParallelTextRow row in CreateMinRefRows( rangeInfo, @@ -194,13 +184,10 @@ NParallelTextRow row in CreateMinRefRows( nonMinRefIndexes.ToArray(), forceInRange: minRefIndexes .Select(i => - nonMinEnumerators.All(e => - e.Current != null && e.Current.TextId == currentRows[i].TextId + anyNonMinEnumeratorsMidRange + && nonMinRefIndexes.All(j => + !completed[j] && currentRows[j].TextId == currentRows[i].TextId ) - && nonMinEnumerators - .Where(e => e.Current != null) - .Select(e => !e.Current.IsRangeStart && e.Current.IsInRange) - .Any(b => b) ) .ToList() ) @@ -211,11 +198,11 @@ NParallelTextRow row in CreateMinRefRows( } foreach (int i in minRefIndexes) { - rangeInfo.Rows[i].SameRefRows.Add(listOfEnumerators[i].Current); - completed[i] = !listOfEnumerators[i].MoveNext(); + rangeInfo.Rows[i].SameRefRows.Add(enumerators[i].Current); + completed[i] = !enumerators[i].MoveNext(); } } - else if (minRefIndexes.Count == (N - completed.Count(c => c))) + else if (minRefIndexes.Count == numberOfRemainingRows) // the refs are all the same { int compareAlignmentCorpus = -1; @@ -244,8 +231,7 @@ NParallelTextRow row in CreateMinRefRows( if ( minRefIndexes .Select(i => - listOfEnumerators[i].Current.IsInRange - && minRefIndexes.All(j => j == i || !AllRowsList[j]) + enumerators[i].Current.IsInRange && minRefIndexes.All(j => j == i || !AllRows[j]) ) .Any(b => b) ) @@ -265,7 +251,7 @@ NParallelTextRow row in CreateMinRefRows( { for (int i = 0; i < rangeInfo.Rows.Count; i++) { - for (int j = 0; j < rangeInfo.Rows.Count; j++) + for (int j = 0; j < rangeInfo.Rows.Count; j++) //TODO rework { if (i == j || completed[i] || completed[j]) continue; @@ -302,7 +288,7 @@ NParallelTextRow row in CreateRows( for (int i = 0; i < rangeInfo.Rows.Count; i++) { rangeInfo.Rows[i].SameRefRows.Add(currentRows[i]); - completed[i] = !listOfEnumerators[i].MoveNext(); + completed[i] = !enumerators[i].MoveNext(); } } else @@ -382,17 +368,17 @@ private IEnumerable CreateMinRefRows( IReadOnlyList forceInRange = null ) { - List<(IList Rows, int Index)> sameRefRowsPerIndex = nonMinRefIndexes + List<(List Rows, int Index)> sameRefRowsPerIndex = nonMinRefIndexes .Select(i => (rangeInfo.Rows[i], i)) - .Select(pair => (pair.Item1.SameRefRows, pair.Item2)) + .Select(pair => (pair.Item1.SameRefRows.ToList(), pair.Item2)) .ToList(); - List alreadyYielded = new List(); + HashSet alreadyYielded = new HashSet(); TextRow[] textRows; foreach (int i in minRefIndexes) { TextRow textRow = currentRows[i]; - foreach ((IList sameRefRows, int j) in sameRefRowsPerIndex) + foreach ((List sameRefRows, int j) in sameRefRowsPerIndex) { if (i == j) continue; @@ -416,13 +402,15 @@ NParallelTextRow row in CreateRows(rangeInfo, textRows, forceInRange: forceInRan } textRows = new TextRow[N]; var forceCurrentInRange = new bool[N]; - foreach (int i in minRefIndexes.Where(i => AllRowsList[i]).Except(alreadyYielded)) + bool rowsHaveContent = false; + foreach (int i in minRefIndexes.Where(i => AllRows[i]).Except(alreadyYielded)) { TextRow textRow = currentRows[i]; textRows[i] = textRow; forceCurrentInRange[i] = forceCurrentInRange[i]; + rowsHaveContent = true; } - if (textRows.Any(tr => tr != null)) + if (rowsHaveContent) { foreach (NParallelTextRow row in CreateRows(rangeInfo, textRows, forceCurrentInRange)) { @@ -492,8 +480,11 @@ public void AddTextRow(TextRow row, int index) public NParallelTextRow CreateRow() { object[][] refs = new object[N][]; - IList referenceRefs = Rows.Where(r => r.Refs.Count > 0).Select(r => r.Refs).FirstOrDefault(); - foreach (int i in System.Linq.Enumerable.Range(0, Rows.Count)) + List referenceRefs = Rows.Where(r => r.Refs.Count > 0) + .Select(r => r.Refs) + .FirstOrDefault() + .ToList(); + foreach (int i in Enumerable.Range(0, Rows.Count)) { var row = Rows[i]; diff --git a/src/SIL.Machine/Corpora/NParallelTextCorpusBase.cs b/src/SIL.Machine/Corpora/NParallelTextCorpusBase.cs index 5487b200..73ccf56f 100644 --- a/src/SIL.Machine/Corpora/NParallelTextCorpusBase.cs +++ b/src/SIL.Machine/Corpora/NParallelTextCorpusBase.cs @@ -6,6 +6,12 @@ namespace SIL.Machine.Corpora { public abstract class NParallelTextCorpusBase : INParallelTextCorpus { + public abstract int N { get; } + + public abstract IReadOnlyList Corpora { get; } + + public abstract bool IsTokenized(int i); + int ICorpus.Count(bool includeEmpty) { return Count(includeEmpty, null); diff --git a/src/SIL.Machine/Corpora/NParallelTextRow.cs b/src/SIL.Machine/Corpora/NParallelTextRow.cs index fd60d8d9..e76c57d9 100644 --- a/src/SIL.Machine/Corpora/NParallelTextRow.cs +++ b/src/SIL.Machine/Corpora/NParallelTextRow.cs @@ -33,18 +33,18 @@ public NParallelTextRow(string textId, IEnumerable> nRefs) public IReadOnlyList> NSegments { get; set; } public IReadOnlyList NFlags { get; set; } - public bool GetIsSentenceStart(int i) => + public bool IsSentenceStart(int i) => NFlags.Count > i ? NFlags[i].HasFlag(TextRowFlags.SentenceStart) : throw new ArgumentOutOfRangeException(); - public bool GetIsInRange(int i) => + public bool IsInRange(int i) => NFlags.Count > i ? NFlags[i].HasFlag(TextRowFlags.InRange) : throw new ArgumentOutOfRangeException(); - public bool GetIsRangeStart(int i) => + public bool IsRangeStart(int i) => NFlags.Count > i ? NFlags[i].HasFlag(TextRowFlags.RangeStart) : throw new ArgumentOutOfRangeException(); public bool IsEmpty => NSegments.All(s => s.Count == 0); - public string GetText(int i) => string.Join(" ", NSegments[i]); + public string Text(int i) => string.Join(" ", NSegments[i]); public IReadOnlyCollection AlignedWordPairs { get; set; } diff --git a/src/SIL.Machine/Corpora/ParallelTextCorpus.cs b/src/SIL.Machine/Corpora/ParallelTextCorpus.cs index 8e79b545..e1b64281 100644 --- a/src/SIL.Machine/Corpora/ParallelTextCorpus.cs +++ b/src/SIL.Machine/Corpora/ParallelTextCorpus.cs @@ -29,14 +29,14 @@ public ParallelTextCorpus( public ITextCorpus SourceCorpus { get; } public ITextCorpus TargetCorpus { get; } - - public NParallelTextCorpus NParallelTextCorpus { get; set; } public IAlignmentCorpus AlignmentCorpus { get; } public IComparer RowRefComparer { get; } + private NParallelTextCorpus NParallelTextCorpus { get; set; } + public override IEnumerable GetRows(IEnumerable textIds) { - NParallelTextCorpus.AllRowsList = new bool[] { AllSourceRows, AllTargetRows }; + NParallelTextCorpus.AllRows = new bool[] { AllSourceRows, AllTargetRows }; bool isScripture = SourceCorpus.IsScripture() && TargetCorpus.IsScripture(); foreach (var nRow in NParallelTextCorpus.GetRows(textIds)) { diff --git a/src/SIL.Machine/Corpora/TextCorpusEnumerator.cs b/src/SIL.Machine/Corpora/TextCorpusEnumerator.cs index 592bfcc6..7653a135 100644 --- a/src/SIL.Machine/Corpora/TextCorpusEnumerator.cs +++ b/src/SIL.Machine/Corpora/TextCorpusEnumerator.cs @@ -6,7 +6,7 @@ namespace SIL.Machine.Corpora { - public class TextCorpusEnumerator : DisposableBase, IEnumerator + internal class TextCorpusEnumerator : DisposableBase, IEnumerator { private readonly IEnumerator _enumerator; private readonly bool _isScripture = false; diff --git a/tests/SIL.Machine.Tests/Corpora/CorporaExtensionsTests.cs b/tests/SIL.Machine.Tests/Corpora/CorporaExtensionsTests.cs index 708b4ffa..2f8ec3a5 100644 --- a/tests/SIL.Machine.Tests/Corpora/CorporaExtensionsTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/CorporaExtensionsTests.cs @@ -94,7 +94,7 @@ public void MergedCorpus_SelectFirst() } ) ); - var nParallelCorpus = new NParallelTextCorpus([corpus1, corpus2, corpus3]) { AllRowsList = [true, true, true] }; + var nParallelCorpus = new NParallelTextCorpus([corpus1, corpus2, corpus3]) { AllRows = [true, true, true] }; var mergedCorpus = nParallelCorpus.ChooseFirst(); var rows = mergedCorpus.ToArray(); Assert.That(rows, Has.Length.EqualTo(3), JsonSerializer.Serialize(rows)); @@ -139,7 +139,7 @@ public void MergedCorpus_SelectRandom_Seed123456() } ) ); - var nParallelCorpus = new NParallelTextCorpus([corpus1, corpus2, corpus3]) { AllRowsList = [true, true, true] }; + var nParallelCorpus = new NParallelTextCorpus([corpus1, corpus2, corpus3]) { AllRows = [true, true, true] }; var mergedCorpus = nParallelCorpus.ChooseRandom(123456); var rows = mergedCorpus.ToArray(); Assert.That(rows, Has.Length.EqualTo(3), JsonSerializer.Serialize(rows)); @@ -187,7 +187,7 @@ public void MergedCorpus_SelectRandom_Seed4501() } ) ); - var nParallelCorpus = new NParallelTextCorpus([corpus1, corpus2, corpus3]) { AllRowsList = [true, true, true] }; + var nParallelCorpus = new NParallelTextCorpus([corpus1, corpus2, corpus3]) { AllRows = [true, true, true] }; var mergedCorpus = nParallelCorpus.ChooseRandom(4501); var rows = mergedCorpus.ToArray(); Assert.That(rows, Has.Length.EqualTo(3), JsonSerializer.Serialize(rows)); diff --git a/tests/SIL.Machine.Tests/Corpora/NParallelTextCorpusTests.cs b/tests/SIL.Machine.Tests/Corpora/NParallelTextCorpusTests.cs index ee3a9150..68dc9f90 100644 --- a/tests/SIL.Machine.Tests/Corpora/NParallelTextCorpusTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/NParallelTextCorpusTests.cs @@ -47,12 +47,12 @@ public void GetRows_ThreeCorpora() Assert.That(rows.Length, Is.EqualTo(3)); Assert.That(rows[0].NRefs.All(r => (int)r[0] == 1)); Assert.That(rows[0].NSegments.All(r => r.SequenceEqual("source segment 1 .".Split()))); - Assert.That(rows[0].GetIsSentenceStart(0), Is.False); - Assert.That(rows[0].GetIsSentenceStart(1), Is.True); + Assert.That(rows[0].IsSentenceStart(0), Is.False); + Assert.That(rows[0].IsSentenceStart(1), Is.True); Assert.That(rows[2].NRefs.All(r => (int)r[0] == 3)); Assert.That(rows[2].NSegments.All(r => r.SequenceEqual("source segment 3 .".Split()))); - Assert.That(rows[2].GetIsSentenceStart(1), Is.False); - Assert.That(rows[2].GetIsSentenceStart(2), Is.True); + Assert.That(rows[2].IsSentenceStart(1), Is.False); + Assert.That(rows[2].IsSentenceStart(2), Is.True); } [Test] @@ -86,8 +86,8 @@ public void GetRows_ThreeCorpora_MissingRows() Assert.That(rows.Length, Is.EqualTo(1)); Assert.That(rows[0].NRefs.All(r => (int)r[0] == 3)); Assert.That(rows[0].NSegments.All(r => r.SequenceEqual("source segment 3 .".Split()))); - Assert.That(rows[0].GetIsSentenceStart(0), Is.True); - Assert.That(rows[0].GetIsSentenceStart(1), Is.False); + Assert.That(rows[0].IsSentenceStart(0), Is.True); + Assert.That(rows[0].IsSentenceStart(1), Is.False); } [Test] @@ -116,13 +116,13 @@ public void GetRows_ThreeCorpora_MissingRows_AllAllRows() var corpus3 = new DictionaryTextCorpus( new MemoryText("text1", new[] { TextRow("text1", 3, "source segment 3 .") }) ); - var nParallelCorpus = new NParallelTextCorpus([corpus1, corpus2, corpus3]) { AllRowsList = [true, true, true] }; + var nParallelCorpus = new NParallelTextCorpus([corpus1, corpus2, corpus3]) { AllRows = [true, true, true] }; NParallelTextRow[] rows = nParallelCorpus.ToArray(); Assert.That(rows.Length, Is.EqualTo(3)); Assert.That(rows[2].NRefs.All(r => (int)r[0] == 3)); Assert.That(rows[2].NSegments.All(r => r.SequenceEqual("source segment 3 .".Split()))); - Assert.That(rows[2].GetIsSentenceStart(0), Is.True); - Assert.That(rows[2].GetIsSentenceStart(1), Is.False); + Assert.That(rows[2].IsSentenceStart(0), Is.True); + Assert.That(rows[2].IsSentenceStart(1), Is.False); } [Test] @@ -151,16 +151,13 @@ public void GetRows_ThreeCorpora_MissingRows_SomeAllRows() var corpus3 = new DictionaryTextCorpus( new MemoryText("text1", new[] { TextRow("text1", 3, "source segment 3 .") }) ); - var nParallelCorpus = new NParallelTextCorpus([corpus1, corpus2, corpus3]) - { - AllRowsList = [true, false, true] - }; + var nParallelCorpus = new NParallelTextCorpus([corpus1, corpus2, corpus3]) { AllRows = [true, false, true] }; NParallelTextRow[] rows = nParallelCorpus.ToArray(); Assert.That(rows.Length, Is.EqualTo(2)); Assert.That(rows[1].NRefs.All(r => (int)r[0] == 3)); Assert.That(rows[1].NSegments.All(r => r.SequenceEqual("source segment 3 .".Split()))); - Assert.That(rows[1].GetIsSentenceStart(0), Is.True); - Assert.That(rows[1].GetIsSentenceStart(1), Is.False); + Assert.That(rows[1].IsSentenceStart(0), Is.True); + Assert.That(rows[1].IsSentenceStart(1), Is.False); } [Test] @@ -198,12 +195,12 @@ public void GetRows_ThreeCorpora_MissingRows_AllAllRows_MissingMiddle() } ) ); - var nParallelCorpus = new NParallelTextCorpus([corpus1, corpus2, corpus3]) { AllRowsList = [true, true, true] }; + var nParallelCorpus = new NParallelTextCorpus([corpus1, corpus2, corpus3]) { AllRows = [true, true, true] }; NParallelTextRow[] rows = nParallelCorpus.ToArray(); Assert.That(rows.Length, Is.EqualTo(3)); Assert.That(rows[1].NRefs.All(r => r.Count == 0 || (int)r[0] == 2)); Assert.That(rows[1].NSegments.All(r => r.Count == 0 || r.SequenceEqual("source segment 2 .".Split()))); - Assert.That(rows[1].GetIsSentenceStart(1), Is.True); + Assert.That(rows[1].IsSentenceStart(1), Is.True); } [Test] @@ -226,15 +223,12 @@ public void GetRows_ThreeCorpora_MissingRows_MissingLastRows() var corpus3 = new DictionaryTextCorpus( new MemoryText("text1", new[] { TextRow("text1", 1, "source segment 1 .") }) ); - var nParallelCorpus = new NParallelTextCorpus([corpus1, corpus2, corpus3]) - { - AllRowsList = [true, false, false] - }; + var nParallelCorpus = new NParallelTextCorpus([corpus1, corpus2, corpus3]) { AllRows = [true, false, false] }; NParallelTextRow[] rows = nParallelCorpus.ToArray(); Assert.That(rows.Length, Is.EqualTo(3)); Assert.That(rows[1].NRefs.All(r => r.Count == 0 || (int)r[0] == 2)); Assert.That(rows[1].NSegments.All(r => r.Count == 0 || r.SequenceEqual("source segment 2 .".Split()))); - Assert.That(rows[1].GetIsSentenceStart(0), Is.True); + Assert.That(rows[1].IsSentenceStart(0), Is.True); } [Test] @@ -250,12 +244,12 @@ public void GetRows_OneCorpus() } ) ); - var nParallelCorpus = new NParallelTextCorpus([corpus1]) { AllRowsList = [true] }; + var nParallelCorpus = new NParallelTextCorpus([corpus1]) { AllRows = [true] }; NParallelTextRow[] rows = nParallelCorpus.ToArray(); Assert.That(rows.Length, Is.EqualTo(2)); Assert.That(rows[0].NRefs.All(r => (int)r[0] == 1)); Assert.That(rows[0].NSegments.All(r => r.SequenceEqual("source segment 1 .".Split()))); - Assert.That(rows[0].GetIsSentenceStart(0), Is.False); + Assert.That(rows[0].IsSentenceStart(0), Is.False); } [Test] @@ -406,10 +400,7 @@ public void GetRows_ThreeCorpora_OverlappingRanges_AllIndividualRows() } ) ); - var nParallelCorpus = new NParallelTextCorpus([corpus1, corpus2, corpus3]) - { - AllRowsList = [false, false, true] - }; + var nParallelCorpus = new NParallelTextCorpus([corpus1, corpus2, corpus3]) { AllRows = [false, false, true] }; NParallelTextRow[] rows = nParallelCorpus.ToArray(); Assert.That(rows.Length, Is.EqualTo(3), JsonSerializer.Serialize(rows)); Assert.That(rows[0].NRefs[0], Is.EquivalentTo(new object[] { 1 })); @@ -461,10 +452,7 @@ public void GetRows_ThreeCorpora_OverlappingRanges_AllRangeOneThroughTwoRows() } ) ); - var nParallelCorpus = new NParallelTextCorpus([corpus1, corpus2, corpus3]) - { - AllRowsList = [false, true, false] - }; + var nParallelCorpus = new NParallelTextCorpus([corpus1, corpus2, corpus3]) { AllRows = [false, true, false] }; NParallelTextRow[] rows = nParallelCorpus.ToArray(); Assert.That(rows.Length, Is.EqualTo(2), JsonSerializer.Serialize(rows)); Assert.That(rows[0].NRefs[0], Is.EquivalentTo(new object[] { 1, 2 })); @@ -516,10 +504,7 @@ public void GetRows_ThreeCorpora_OverlappingRanges_AllRangeTwoThroughThreeRows() } ) ); - var nParallelCorpus = new NParallelTextCorpus([corpus1, corpus2, corpus3]) - { - AllRowsList = [true, false, false] - }; + var nParallelCorpus = new NParallelTextCorpus([corpus1, corpus2, corpus3]) { AllRows = [true, false, false] }; NParallelTextRow[] rows = nParallelCorpus.ToArray(); Assert.That(rows.Length, Is.EqualTo(2), JsonSerializer.Serialize(rows)); Assert.That(rows[0].NRefs[0], Is.EquivalentTo(new object[] { 1 }));