diff --git a/src/SIL.Machine/Corpora/AlignedWordPair.cs b/src/SIL.Machine/Corpora/AlignedWordPair.cs index cf25108c..534b87f8 100644 --- a/src/SIL.Machine/Corpora/AlignedWordPair.cs +++ b/src/SIL.Machine/Corpora/AlignedWordPair.cs @@ -23,6 +23,20 @@ public static IReadOnlyCollection Parse(string alignments, bool return result; } + public static bool TryParse(string alignments, out IReadOnlyCollection alignedWordPairs) + { + alignedWordPairs = null; + try + { + alignedWordPairs = Parse(alignments); + return true; + } + catch + { + return false; + } + } + public AlignedWordPair(int sourceIndex, int targetIndex) { SourceIndex = sourceIndex; diff --git a/src/SIL.Machine/Translation/IWordAlignmentEngine.cs b/src/SIL.Machine/Translation/IWordAlignmentEngine.cs new file mode 100644 index 00000000..f830b1b7 --- /dev/null +++ b/src/SIL.Machine/Translation/IWordAlignmentEngine.cs @@ -0,0 +1,26 @@ +using System; +using System.Collections.Generic; +using System.Threading; +using System.Threading.Tasks; + +namespace SIL.Machine.Translation +{ + public interface IWordAlignmentEngine : IWordAligner, IDisposable + { + Task GetBestAlignmentAsync( + string sourceSegment, + string targetSegment, + CancellationToken cancellationToken = default + ); + + Task GetBestAlignmentAsync( + IReadOnlyList sourceSegment, + IReadOnlyList targetSegment, + CancellationToken cancellationToken = default + ); + + WordAlignmentResult GetBestAlignment(string sourceSegment, string targetSegment); + + WordAlignmentResult GetBestAlignment(IReadOnlyList sourceSegment, IReadOnlyList targetSegment); + } +} diff --git a/src/SIL.Machine/Translation/IWordAlignmentModel.cs b/src/SIL.Machine/Translation/IWordAlignmentModel.cs index 483f47d3..7fa07505 100644 --- a/src/SIL.Machine/Translation/IWordAlignmentModel.cs +++ b/src/SIL.Machine/Translation/IWordAlignmentModel.cs @@ -1,32 +1,9 @@ -using System; -using System.Collections.Generic; -using SIL.Machine.Corpora; -using SIL.ObjectModel; +using SIL.Machine.Corpora; namespace SIL.Machine.Translation { - public interface IWordAlignmentModel : IWordAligner, IDisposable + public interface IWordAlignmentModel : IWordAlignmentEngine { - IWordVocabulary SourceWords { get; } - IWordVocabulary TargetWords { get; } - IReadOnlySet SpecialSymbolIndices { get; } - ITrainer CreateTrainer(IParallelTextCorpus corpus); - - IEnumerable<(string TargetWord, double Score)> GetTranslations(string sourceWord, double threshold = 0); - IEnumerable<(int TargetWordIndex, double Score)> GetTranslations(int sourceWordIndex, double threshold = 0); - - double GetTranslationScore(string sourceWord, string targetWord); - double GetTranslationScore(int sourceWordIndex, int targetWordIndex); - - IReadOnlyCollection GetBestAlignedWordPairs( - IReadOnlyList sourceSegment, - IReadOnlyList targetSegment - ); - void ComputeAlignedWordPairScores( - IReadOnlyList sourceSegment, - IReadOnlyList targetSegment, - IReadOnlyCollection wordPairs - ); } } diff --git a/src/SIL.Machine/Translation/WordAlignmentResult.cs b/src/SIL.Machine/Translation/WordAlignmentResult.cs new file mode 100644 index 00000000..01f7fb72 --- /dev/null +++ b/src/SIL.Machine/Translation/WordAlignmentResult.cs @@ -0,0 +1,48 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace SIL.Machine.Translation +{ + public class WordAlignmentResult + { + public WordAlignmentResult( + IEnumerable sourceTokens, + IEnumerable targetTokens, + IEnumerable confidences, + WordAlignmentMatrix alignment + ) + { + SourceTokens = sourceTokens.ToArray(); + TargetTokens = targetTokens.ToArray(); + Confidences = confidences.ToArray(); + if (Confidences.Count != TargetTokens.Count) + { + throw new ArgumentException( + "The confidences must be the same length as the target segment.", + nameof(confidences) + ); + } + Alignment = alignment; + if (Alignment.RowCount != SourceTokens.Count) + { + throw new ArgumentException( + "The alignment source length must be the same length as the source segment.", + nameof(alignment) + ); + } + if (Alignment.ColumnCount != TargetTokens.Count) + { + throw new ArgumentException( + "The alignment target length must be the same length as the target segment.", + nameof(alignment) + ); + } + } + + public IReadOnlyList SourceTokens { get; } + public IReadOnlyList TargetTokens { get; } + public IReadOnlyList Confidences { get; } + public WordAlignmentMatrix Alignment { get; } + } +}