Skip to content

Commit

Permalink
Preserve changes from #205.
Browse files Browse the repository at this point in the history
  • Loading branch information
johnml1135 committed Jun 4, 2024
1 parent 8cecc70 commit 4c89be1
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 29 deletions.
5 changes: 2 additions & 3 deletions src/SIL.Machine.AspNetCore/Services/NmtPreprocessBuildJob.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@ ILanguageTagService languageTagService
{
private readonly ILanguageTagService _languageTagService = languageTagService;

protected override string ResolveLanguageCode(string languageCode)
protected override bool ResolveLanguageCodeForBaseModel(string languageCode, out string resolvedCode)
{
_languageTagService.ConvertToFlores200Code(languageCode, out string resolvedCode);
return resolvedCode;
return _languageTagService.ConvertToFlores200Code(languageCode, out resolvedCode);
}
}
18 changes: 14 additions & 4 deletions src/SIL.Machine.AspNetCore/Services/PreprocessBuildJob.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,19 @@ CancellationToken cancellationToken
if (engine is null)
throw new OperationCanceledException($"Engine {engineId} does not exist. Build canceled.");

buildPreprocessSummary.Add("SourceLanguageResolved", ResolveLanguageCode(engine.SourceLanguage));
buildPreprocessSummary.Add("TargetLanguageResolved", ResolveLanguageCode(engine.TargetLanguage));
bool sourceTagInBaseModel = ResolveLanguageCodeForBaseModel(engine.SourceLanguage, out string srcLang);
buildPreprocessSummary.Add("SourceLanguageResolved", srcLang);
bool targetTagInBaseModel = ResolveLanguageCodeForBaseModel(engine.TargetLanguage, out string trgLang);
buildPreprocessSummary.Add("TargetLanguageResolved", trgLang);
Logger.LogInformation("{summary}", buildPreprocessSummary.ToJsonString());

if (trainCount == 0 && (!sourceTagInBaseModel || !targetTagInBaseModel))
{
throw new InvalidOperationException(
$"Neither language code in build {buildId} are known to the base model, and the data specified for training was empty. Build canceled."
);
}

cancellationToken.ThrowIfCancellationRequested();

await using (await @lock.WriterLockAsync(cancellationToken: cancellationToken))
Expand Down Expand Up @@ -418,8 +427,9 @@ private record Row(
int RowCount
);

protected virtual string ResolveLanguageCode(string languageCode)
protected virtual bool ResolveLanguageCodeForBaseModel(string languageCode, out string resolvedCode)
{
return languageCode;
resolvedCode = languageCode;
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,15 @@ public void RunAsync_UnknownLanguageTagsNoData()
});
}

[Test]
public async Task RunAsync_UnknownLanguageTagsNoDataSmtTransfer()
{
using TestEnvironment env = new();
Corpus corpus1 = env.DefaultTextFileCorpus with { SourceLanguage = "xxx", TargetLanguage = "zzz" };

await env.RunBuildJobAsync(corpus1, engineId: "engine2", engineType: TranslationEngineType.SmtTransfer);
}

private class TestEnvironment : ObjectModel.DisposableBase
{
private static readonly string TestDataPath = Path.Combine(
Expand All @@ -242,9 +251,7 @@ private class TestEnvironment : ObjectModel.DisposableBase
public MemoryRepository<TranslationEngine> Engines { get; }
public IDistributedReaderWriterLockFactory LockFactory { get; }
public IBuildJobService BuildJobService { get; }
public ILogger<PreprocessBuildJob> Logger { get; }
public IClearMLService ClearMLService { get; }
public PreprocessBuildJob BuildJob { get; }
public IOptionsMonitor<BuildJobOptions> BuildJobOptions { get; }

public Corpus DefaultTextFileCorpus { get; }
Expand Down Expand Up @@ -426,7 +433,6 @@ public TestEnvironment()
)
.Returns(Task.FromResult("job1"));
SharedFileService = new SharedFileService(Substitute.For<ILoggerFactory>());
Logger = Substitute.For<ILogger<NmtPreprocessBuildJob>>();
BuildJobService = new BuildJobService(
[
new HangfireBuildJobRunner(
Expand All @@ -447,29 +453,58 @@ [new NmtHangfireBuildJobFactory()]
],
Engines
);
BuildJob = new PreprocessBuildJob(
PlatformService,
Engines,
LockFactory,
Logger,
BuildJobService,
SharedFileService,
CorpusService
)
}

public PreprocessBuildJob GetBuildJob(TranslationEngineType engineType)
{
switch (engineType)
{
Seed = 1234
};
case TranslationEngineType.Nmt:
{
return new NmtPreprocessBuildJob(
PlatformService,
Engines,
LockFactory,
Substitute.For<ILogger<NmtPreprocessBuildJob>>(),
BuildJobService,
SharedFileService,
CorpusService,
new LanguageTagService()
)
{
Seed = 1234
};
}
case TranslationEngineType.SmtTransfer:
{
return new PreprocessBuildJob(
PlatformService,
Engines,
LockFactory,
Substitute.For<ILogger<PreprocessBuildJob>>(),
BuildJobService,
SharedFileService,
CorpusService
)
{
Seed = 1234
};
}
default:
throw new InvalidOperationException("Unknown engine type.");
}
;
}

public Task RunBuildJobAsync(Corpus corpus, bool useKeyTerms = true, string engineId = "engine1")
public Task RunBuildJobAsync(
Corpus corpus,
bool useKeyTerms = true,
string engineId = "engine1",
TranslationEngineType engineType = TranslationEngineType.Nmt
)
{
return BuildJob.RunAsync(
engineId,
"build1",
[corpus],
useKeyTerms ? null : "{\"use_key_terms\":false}",
default
);
return GetBuildJob(engineType)
.RunAsync(engineId, "build1", [corpus], useKeyTerms ? null : "{\"use_key_terms\":false}", default);
}

public async Task<(int Source1Count, int Source2Count, int TargetCount, int TermCount)> GetTrainCountAsync()
Expand Down

0 comments on commit 4c89be1

Please sign in to comment.