Skip to content

Commit

Permalink
a start
Browse files Browse the repository at this point in the history
  • Loading branch information
johnml1135 committed Nov 21, 2024
1 parent 9720b46 commit 7d3b138
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 25 deletions.
14 changes: 14 additions & 0 deletions src/SIL.Machine/Corpora/AlignedWordPair.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,20 @@ public static IReadOnlyCollection<AlignedWordPair> Parse(string alignments, bool
return result;
}

public static bool TryParse(string alignments, out IReadOnlyCollection<AlignedWordPair> alignedWordPairs)
{
alignedWordPairs = null;
try
{
alignedWordPairs = Parse(alignments);
return true;
}
catch
{
return false;
}
}

public AlignedWordPair(int sourceIndex, int targetIndex)
{
SourceIndex = sourceIndex;
Expand Down
26 changes: 26 additions & 0 deletions src/SIL.Machine/Translation/IWordAlignmentEngine.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;

namespace SIL.Machine.Translation
{
public interface IWordAlignmentEngine : IWordAligner, IDisposable
{
Task<WordAlignmentResult> GetBestAlignmentAsync(
string sourceSegment,
string targetSegment,
CancellationToken cancellationToken = default
);

Task<WordAlignmentResult> GetBestAlignmentAsync(
IReadOnlyList<string> sourceSegment,
IReadOnlyList<string> targetSegment,
CancellationToken cancellationToken = default
);

WordAlignmentResult GetBestAlignment(string sourceSegment, string targetSegment);

WordAlignmentResult GetBestAlignment(IReadOnlyList<string> sourceSegment, IReadOnlyList<string> targetSegment);
}
}
27 changes: 2 additions & 25 deletions src/SIL.Machine/Translation/IWordAlignmentModel.cs
Original file line number Diff line number Diff line change
@@ -1,32 +1,9 @@
using System;
using System.Collections.Generic;
using SIL.Machine.Corpora;
using SIL.ObjectModel;
using SIL.Machine.Corpora;

namespace SIL.Machine.Translation
{
public interface IWordAlignmentModel : IWordAligner, IDisposable
public interface IWordAlignmentModel : IWordAlignmentEngine
{
IWordVocabulary SourceWords { get; }
IWordVocabulary TargetWords { get; }
IReadOnlySet<int> SpecialSymbolIndices { get; }

ITrainer CreateTrainer(IParallelTextCorpus corpus);

IEnumerable<(string TargetWord, double Score)> GetTranslations(string sourceWord, double threshold = 0);
IEnumerable<(int TargetWordIndex, double Score)> GetTranslations(int sourceWordIndex, double threshold = 0);

double GetTranslationScore(string sourceWord, string targetWord);
double GetTranslationScore(int sourceWordIndex, int targetWordIndex);

IReadOnlyCollection<AlignedWordPair> GetBestAlignedWordPairs(
IReadOnlyList<string> sourceSegment,
IReadOnlyList<string> targetSegment
);
void ComputeAlignedWordPairScores(
IReadOnlyList<string> sourceSegment,
IReadOnlyList<string> targetSegment,
IReadOnlyCollection<AlignedWordPair> wordPairs
);
}
}
48 changes: 48 additions & 0 deletions src/SIL.Machine/Translation/WordAlignmentResult.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
using System;
using System.Collections.Generic;
using System.Linq;

namespace SIL.Machine.Translation
{
public class WordAlignmentResult
{
public WordAlignmentResult(
IEnumerable<string> sourceTokens,
IEnumerable<string> targetTokens,
IEnumerable<double> confidences,
WordAlignmentMatrix alignment
)
{
SourceTokens = sourceTokens.ToArray();
TargetTokens = targetTokens.ToArray();
Confidences = confidences.ToArray();
if (Confidences.Count != TargetTokens.Count)
{
throw new ArgumentException(
"The confidences must be the same length as the target segment.",
nameof(confidences)
);
}
Alignment = alignment;
if (Alignment.RowCount != SourceTokens.Count)
{
throw new ArgumentException(
"The alignment source length must be the same length as the source segment.",
nameof(alignment)
);
}
if (Alignment.ColumnCount != TargetTokens.Count)
{
throw new ArgumentException(
"The alignment target length must be the same length as the target segment.",
nameof(alignment)
);
}
}

public IReadOnlyList<string> SourceTokens { get; }
public IReadOnlyList<string> TargetTokens { get; }
public IReadOnlyList<double> Confidences { get; }
public WordAlignmentMatrix Alignment { get; }
}
}

0 comments on commit 7d3b138

Please sign in to comment.