diff --git a/.vscode/settings.json b/.vscode/settings.json index 4c5aadb3..cbe0a073 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -28,6 +28,7 @@ "ptcc", "Rebinder", "stylesheet", + "timespan", "upserted", "USFM" ], diff --git a/README.md b/README.md index 4e2b2880..326d20ab 100644 --- a/README.md +++ b/README.md @@ -60,15 +60,13 @@ There are 3 different environments that Serval is deployed to: - Run `kubectl config use-context dallas-rke` - First, startup the storage (using internal qa for example) - `helm install serval-pvc deploy/serval-pvc -n nlp -f deploy/qa-int-values.yaml` -- Then, startup the database (give it 60 seconds) -- `helm install mongo deploy/mongo -n nlp -f deploy/qa-int-values.yaml` - Now you can turn on Serval - `helm install serval deploy/serval -n nlp -f deploy/qa-int-values.yaml` ### To update the cluster - To upgrade Serval: - For QA internal Run: - - `kubectl config use-context dallas-rke` + - `kubectl config use-context dallas-stage` - `helm upgrade serval deploy/serval -n nlp -f deploy/qa-int-values.yaml` - For QA external Run: - `kubectl config use-context dallas-rke` diff --git a/deploy/mongo/Chart.yaml b/deploy/mongo/Chart.yaml deleted file mode 100644 index e7a63115..00000000 --- a/deploy/mongo/Chart.yaml +++ /dev/null @@ -1,8 +0,0 @@ -name: mongo-repl -description: A mongo deployment to support serval -version: 0.0.1 -apiVersion: v1 -keywords: - - mongo -sources: -home: diff --git a/deploy/mongo/templates/mongo-deployment.yaml b/deploy/mongo/templates/mongo-deployment.yaml deleted file mode 100644 index 8ae37d93..00000000 --- a/deploy/mongo/templates/mongo-deployment.yaml +++ /dev/null @@ -1,44 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - labels: - app: mongo - name: mongo -spec: - replicas: 1 - selector: - matchLabels: - app: mongo - strategy: - type: Recreate - template: - metadata: - labels: - app: mongo - spec: - terminationGracePeriodSeconds: 30 - containers: - - command: ["/bin/sh", "-c"] - args: ['mongod --replSet myRS --bind_ip 0.0.0.0 & sleep 15s; mongosh --host localhost:27017 --eval '' config = { "_id" : "myRS", "members" : [{"_id" : 0,"host" : "mongo:27017"}] }; rs.initiate(config, { force: true }); '' ; sleep infinity'] - image: mongo:6.0 - imagePullPolicy: "Always" - name: mongo - ports: - - containerPort: 27017 - resources: - limits: - memory: "2000Mi" - cpu: "1000m" - requests: - memory: "2000Mi" - cpu: "1000m" - volumeMounts: - - mountPath: /data/db - name: mongo-data - hostname: mongo - restartPolicy: Always - volumes: - - name: mongo-data - persistentVolumeClaim: - claimName: serval-mongo-claim -status: {} diff --git a/deploy/mongo/templates/mongo-service.yaml b/deploy/mongo/templates/mongo-service.yaml deleted file mode 100644 index f787c84e..00000000 --- a/deploy/mongo/templates/mongo-service.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - labels: - app: mongo - name: mongo -spec: - ports: - - name: "27017" - port: 27017 - targetPort: 27017 - selector: - app: mongo -status: - loadBalancer: {} diff --git a/deploy/qa-ext-values.yaml b/deploy/qa-ext-values.yaml index bca7463f..7106e030 100644 --- a/deploy/qa-ext-values.yaml +++ b/deploy/qa-ext-values.yaml @@ -1,6 +1,6 @@ externalHost: qa.serval-api.org environment: Production -deploymentVersion: '1.7.QA0' +deploymentVersion: '1.7.QA7' alertEmail: ext-qa-serval-alerts@languagetechnology.org emailsToAlert: john_lambert@sil.org enableTls: true @@ -8,8 +8,8 @@ namespace: serval auth0Domain: dev-sillsdev.auth0.com lokiTenent: serval-tenant lokiUrl: http://loki-distributed-gateway.loki.svc.cluster.local -servalImage: ghcr.io/sillsdev/serval:1.7.0 -ClearMLDockerImage: ghcr.io/sillsdev/machine.py:1.6.3 +servalImage: ghcr.io/sillsdev/serval:1.7.7 +ClearMLDockerImage: ghcr.io/sillsdev/machine.py:1.7.2 ClearMLQueue: production MongoConnectionPrefix: qa_ SharedFileLocation: s3://silnlp/ext-qa/ diff --git a/deploy/qa-int-values.yaml b/deploy/qa-int-values.yaml index 3a520728..e047f4a7 100644 --- a/deploy/qa-int-values.yaml +++ b/deploy/qa-int-values.yaml @@ -13,6 +13,6 @@ ClearMLDockerImage: ghcr.io/sillsdev/machine.py:1.6.3 ClearMLQueue: lambert_24gb MongoConnectionPrefix: qa_int_ SharedFileLocation: s3://silnlp/int-qa/ -servalClaimSize: 1Gi -machineClaimSize: 2Gi +servalClaimSize: 5Gi +machineClaimSize: 20Gi enableEcho: true \ No newline at end of file diff --git a/deploy/serval-pvc/templates/persistent-volume-claims.yaml b/deploy/serval-pvc/templates/persistent-volume-claims.yaml index 5acc3718..c4f1a8d5 100644 --- a/deploy/serval-pvc/templates/persistent-volume-claims.yaml +++ b/deploy/serval-pvc/templates/persistent-volume-claims.yaml @@ -35,17 +35,4 @@ spec: - ReadWriteMany resources: requests: - storage: 50M ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: serval-mongo-claim - namespace: {{ .Values.namespace}} -spec: - storageClassName: "longhorn" - accessModes: - - ReadWriteMany - resources: - requests: - storage: 10Gi \ No newline at end of file + storage: 55M \ No newline at end of file diff --git a/deploy/serval/templates/fluentd-flows.yaml b/deploy/serval/templates/fluentd-flows.yaml index 84db700e..2d9729bc 100644 --- a/deploy/serval/templates/fluentd-flows.yaml +++ b/deploy/serval/templates/fluentd-flows.yaml @@ -26,21 +26,3 @@ spec: - echo hosts: [] labels: {} ---- -apiVersion: logging.banzaicloud.io/v1beta1 -kind: Flow -metadata: - name: mongo-flow - namespace: {{ .Values.namespace }} -spec: - globalOutputRefs: [] - localOutputRefs: - - {{ .Values.namespace }}-loki-output - match: - - select: - container_names: - - mongo - hosts: [] - labels: {} -status: - active: true diff --git a/samples/ApiExample/ApiExample.csproj b/samples/ApiExample/ApiExample.csproj new file mode 100644 index 00000000..9d56d539 --- /dev/null +++ b/samples/ApiExample/ApiExample.csproj @@ -0,0 +1,28 @@ + + + + Exe + net8.0 + enable + enable + 4d0606c3-0fc7-4d76-b43b-236485004e81 + + + + + PreserveNewest + + + PreserveNewest + + + + + + + + + + + + diff --git a/samples/ApiExample/ApiExample.sln b/samples/ApiExample/ApiExample.sln new file mode 100644 index 00000000..dbdd4696 --- /dev/null +++ b/samples/ApiExample/ApiExample.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.11.35327.3 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ApiExample", "ApiExample.csproj", "{F80F8853-776B-4C3A-B789-B8FD5820150A}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {F80F8853-776B-4C3A-B789-B8FD5820150A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {F80F8853-776B-4C3A-B789-B8FD5820150A}.Debug|Any CPU.Build.0 = Debug|Any CPU + {F80F8853-776B-4C3A-B789-B8FD5820150A}.Release|Any CPU.ActiveCfg = Release|Any CPU + {F80F8853-776B-4C3A-B789-B8FD5820150A}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {72D18D80-E951-41EE-8A1F-97B2B72615AD} + EndGlobalSection +EndGlobal diff --git a/samples/ApiExample/Program.cs b/samples/ApiExample/Program.cs new file mode 100644 index 00000000..00dd0830 --- /dev/null +++ b/samples/ApiExample/Program.cs @@ -0,0 +1,318 @@ +using System.IO.Compression; +using ApiExample; +using IdentityModel.Client; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using Newtonsoft.Json.Linq; +using Serval.Client; + +// Setup and get the services +ServiceProvider services = SetupServices(); +IDataFilesClient dataFilesClient = services.GetService()!; +ICorporaClient corporaClient = services.GetService()!; +ITranslationEnginesClient translationEnginesClient = services.GetService()!; + +// Trap Ctrl+C cancellation +var cancellationTokenSource = new CancellationTokenSource(); +Console.CancelKeyPress += (_, eventArgs) => +{ + Console.WriteLine("Cancelling..."); + cancellationTokenSource.Cancel(); + eventArgs.Cancel = true; +}; + +// Create then tear down a pre-translation (NMT) engine +await CreatePreTranslationEngineAsync(cancellationTokenSource.Token); + +// Exit +return; + +static ServiceProvider SetupServices() +{ + const string HttpClientName = "serval-api"; + const string TokenClientName = "serval-api-token"; + + var configurationBuilder = new ConfigurationBuilder(); + IConfiguration configuration = configurationBuilder + .AddJsonFile("appsettings.json", false, true) + .AddUserSecrets() + .Build(); + ServalOptions servalOptions = configuration.GetSection("Serval").Get()!; + + var services = new ServiceCollection(); + services.AddDistributedMemoryCache(); + services + .AddClientCredentialsTokenManagement() + .AddClient( + TokenClientName, + client => + { + client.TokenEndpoint = servalOptions.TokenUrl; + client.ClientId = servalOptions.ClientId; + client.ClientSecret = servalOptions.ClientSecret; + client.Parameters = new Parameters { { "audience", servalOptions.Audience } }; + } + ); + services.AddClientCredentialsHttpClient( + HttpClientName, + TokenClientName, + configureClient: client => client.BaseAddress = new Uri(servalOptions.ApiServer) + ); + services.AddHttpClient(HttpClientName).SetHandlerLifetime(TimeSpan.FromMinutes(5)); + services.AddSingleton(sp => + { + // Instantiate the translation engines client with the named HTTP client + IHttpClientFactory? factory = sp.GetService(); + HttpClient httpClient = factory!.CreateClient(HttpClientName); + return new TranslationEnginesClient(httpClient); + }); + services.AddSingleton(sp => + { + // Instantiate the data files client with the named HTTP client + IHttpClientFactory? factory = sp.GetService(); + HttpClient httpClient = factory!.CreateClient(HttpClientName); + return new DataFilesClient(httpClient); + }); + services.AddSingleton(sp => + { + // Instantiate the corpora client with the named HTTP client + IHttpClientFactory? factory = sp.GetService(); + HttpClient httpClient = factory!.CreateClient(HttpClientName); + return new CorporaClient(httpClient); + }); + return services.BuildServiceProvider(); +} + +async Task CreatePreTranslationEngineAsync(CancellationToken cancellationToken) +{ + string? sourceDataFileId = null; + string? targetDataFileId = null; + string? sourceCorpusId = null; + string? targetCorpusId = null; + string? parallelCorpusId = null; + string? translationEngineId = null; + + try + { + // 1a. Create the source data file + Console.WriteLine("Create a source data file"); + const string SourceDirectory = "TEA"; + const string SourceFileName = $"{SourceDirectory}.zip"; + await using (var sourceFileStream = new MemoryStream()) + { + ZipFile.CreateFromDirectory(Path.Combine("data", SourceDirectory), sourceFileStream); + sourceFileStream.Seek(0, SeekOrigin.Begin); + DataFile sourceDataFile = await dataFilesClient.CreateAsync( + new FileParameter(sourceFileStream, SourceFileName), + FileFormat.Paratext, + SourceFileName, + cancellationToken + ); + sourceDataFileId = sourceDataFile.Id; + } + + // 1b. Create the target data file + Console.WriteLine("Create a target data file"); + const string TargetDirectory = "TMA"; + const string TargetFileName = $"{TargetDirectory}.zip"; + await using (var targetFileStream = new MemoryStream()) + { + ZipFile.CreateFromDirectory(Path.Combine("data", TargetDirectory), targetFileStream); + targetFileStream.Seek(0, SeekOrigin.Begin); + DataFile targetDataFile = await dataFilesClient.CreateAsync( + new FileParameter(targetFileStream, TargetFileName), + FileFormat.Paratext, + TargetFileName, + cancellationToken + ); + targetDataFileId = targetDataFile.Id; + } + + // 2a. Create the source corpus + // NOTE: The text id for the source and target corpora must match + Console.WriteLine("Create the source corpus"); + const string SourceLanguageCode = "en"; + var corpusConfig = new CorpusConfig + { + Name = "English Source Corpus", + Files = [new CorpusFileConfig { FileId = sourceDataFileId, TextId = "TestData" }], + Language = SourceLanguageCode, + }; + Corpus translationCorpus = await corporaClient.CreateAsync(corpusConfig, cancellationToken); + sourceCorpusId = translationCorpus.Id; + + // 2b. Create the target corpus + Console.WriteLine("Create the target corpus"); + const string TargetLanguageCode = "mi"; + corpusConfig = new CorpusConfig + { + Name = "Maori Target Corpus", + Files = [new CorpusFileConfig { FileId = targetDataFileId, TextId = "TestData" }], + Language = TargetLanguageCode, + }; + translationCorpus = await corporaClient.CreateAsync(corpusConfig, cancellationToken); + targetCorpusId = translationCorpus.Id; + + // 3. Create the translation engine + Console.WriteLine("Create the translation engine"); + var engineConfig = new TranslationEngineConfig + { + Name = "Test Engine", + SourceLanguage = SourceLanguageCode, + TargetLanguage = TargetLanguageCode, + Type = "nmt", + }; + TranslationEngine translationEngine = await translationEnginesClient.CreateAsync( + engineConfig, + cancellationToken + ); + translationEngineId = translationEngine.Id; + + // 4. Create the parallel corpus + TranslationParallelCorpus parallelCorpus = await translationEnginesClient.AddParallelCorpusAsync( + translationEngineId, + new TranslationParallelCorpusConfig + { + Name = "Test Parallel Corpus", + SourceCorpusIds = [sourceCorpusId], + TargetCorpusIds = [targetCorpusId], + }, + cancellationToken + ); + parallelCorpusId = parallelCorpus.Id; + + // 5. Start a build + Console.WriteLine("Start a build"); + + // NOTE: This build is restricted to 20 steps for speed of build + // The generated translation will be very, very inaccurate. + JObject options = []; + options.Add("max_steps", 20); + + // We will train on one book, and translate two books + var translationBuildConfig = new TranslationBuildConfig + { + Name = "Test Build", + Options = options, + Pretranslate = + [ + new PretranslateCorpusConfig + { + ParallelCorpusId = parallelCorpusId, + SourceFilters = + [ + new ParallelCorpusFilterConfig { CorpusId = sourceCorpusId, ScriptureRange = "LAO;MAN" }, + ], + }, + ], + TrainOn = + [ + new TrainingCorpusConfig + { + ParallelCorpusId = parallelCorpusId, + SourceFilters = + [ + new ParallelCorpusFilterConfig { CorpusId = sourceCorpusId, ScriptureRange = "PS2" }, + ], + TargetFilters = + [ + new ParallelCorpusFilterConfig { CorpusId = targetCorpusId, ScriptureRange = "PS2" }, + ], + }, + ], + }; + TranslationBuild translationBuild = await translationEnginesClient.StartBuildAsync( + translationEngineId, + translationBuildConfig, + cancellationToken + ); + + // Wait until the build is finished + (int _, int cursorTop) = Console.GetCursorPosition(); + DateTime timeOut = DateTime.Now.AddMinutes(30); + while (DateTime.Now < timeOut) + { + translationBuild = await translationEnginesClient.GetBuildAsync( + translationEngineId, + translationBuild.Id, + minRevision: null, + cancellationToken + ); + if (translationBuild.DateFinished is not null) + { + break; + } + + Console.SetCursorPosition(0, cursorTop); + Console.WriteLine( + $"{translationBuild.State}: {(translationBuild.PercentCompleted ?? 0) * 100}% completed... " + ); + + // Wait 20 seconds + cancellationToken.WaitHandle.WaitOne(millisecondsTimeout: 20000); + } + + // Display the pre-translation USFM + string usfm = await translationEnginesClient.GetPretranslatedUsfmAsync( + translationEngineId, + parallelCorpusId, + textId: "LAO", + PretranslationUsfmTextOrigin.OnlyPretranslated, + PretranslationUsfmTemplate.Source, + cancellationToken + ); + Console.WriteLine(usfm); + + Console.WriteLine("Done!"); + } + catch (TaskCanceledException) + { + // The process was cancelled via Ctrl+C + } + finally + { + // Clean up created entities + if (!string.IsNullOrWhiteSpace(sourceDataFileId)) + { + Console.WriteLine("Delete the Source Data File"); + await dataFilesClient.DeleteAsync(sourceDataFileId, CancellationToken.None); + } + + if (!string.IsNullOrWhiteSpace(targetDataFileId)) + { + Console.WriteLine("Delete the Target Data File"); + await dataFilesClient.DeleteAsync(targetDataFileId, CancellationToken.None); + } + + if (!string.IsNullOrWhiteSpace(sourceCorpusId)) + { + Console.WriteLine("Delete the Source Corpus"); + await corporaClient.DeleteAsync(sourceCorpusId, CancellationToken.None); + } + + if (!string.IsNullOrWhiteSpace(targetCorpusId)) + { + Console.WriteLine("Delete the Target Corpus"); + await corporaClient.DeleteAsync(targetCorpusId, CancellationToken.None); + } + + if (!string.IsNullOrWhiteSpace(translationEngineId)) + { + if (!string.IsNullOrWhiteSpace(parallelCorpusId)) + { + Console.WriteLine("Delete the Parallel Corpus"); + await translationEnginesClient.DeleteParallelCorpusAsync( + translationEngineId, + parallelCorpusId, + CancellationToken.None + ); + } + + Console.WriteLine("Cancel the current build"); + await translationEnginesClient.CancelBuildAsync(translationEngineId, CancellationToken.None); + + Console.WriteLine("Delete the Translation Engine"); + await translationEnginesClient.DeleteAsync(translationEngineId, CancellationToken.None); + } + } +} diff --git a/samples/ApiExample/README.md b/samples/ApiExample/README.md new file mode 100644 index 00000000..9e45acac --- /dev/null +++ b/samples/ApiExample/README.md @@ -0,0 +1,24 @@ +# Serval API Example + +This example application will generate a pre-translation USFM draft using the Serval API, and display it in the terminal window. + +## Pre-Requisites + + * .NET SDK 8.0 + * You must have a Serval Client ID and Client Secret before running this example. + +## Setup + +Before running, you must configure your Serval Client Id and Client Secret via `dotnet user-secrets`: +``` +dotnet user-secrets set "Serval:ClientId" "your_client_id_here" +dotnet user-secrets set "Serval:ClientSecret" "your_client_secret_here" +``` + +## Run + +To run this example after configuring your user secrets, execute the following command from a terminal window: + +``` +dotnet run +``` diff --git a/samples/ApiExample/ServalOptions.cs b/samples/ApiExample/ServalOptions.cs new file mode 100644 index 00000000..3148fc18 --- /dev/null +++ b/samples/ApiExample/ServalOptions.cs @@ -0,0 +1,32 @@ +namespace ApiExample; + +/// +/// The Serval API options configured via dotnet user-secrets. +/// +public record ServalOptions +{ + /// + /// Gets the Serval API Server to use. + /// + public string ApiServer { get; init; } = string.Empty; + + /// + /// Gets the JWT audience. + /// + public string Audience { get; init; } = string.Empty; + + /// + /// Gets the JWT client identifier. + /// + public string ClientId { get; init; } = string.Empty; + + /// + /// Gets the JWT client secret. + /// + public string ClientSecret { get; init; } = string.Empty; + + /// + /// Gets or sets the endpoint to generate the JWT. + /// + public string TokenUrl { get; init; } = string.Empty; +} diff --git a/samples/ApiExample/appsettings.json b/samples/ApiExample/appsettings.json new file mode 100644 index 00000000..9bbb173d --- /dev/null +++ b/samples/ApiExample/appsettings.json @@ -0,0 +1,7 @@ +{ + "Serval": { + "ApiServer": "https://qa.serval-api.org", + "Audience": "https://serval-api.org/", + "TokenUrl": "https://dev-sillsdev.auth0.com/oauth/token" + } +} diff --git a/samples/ApiExample/data/TEA/84MANTEA.SFM b/samples/ApiExample/data/TEA/84MANTEA.SFM new file mode 100644 index 00000000..e3a34715 --- /dev/null +++ b/samples/ApiExample/data/TEA/84MANTEA.SFM @@ -0,0 +1,66 @@ +\id MAN - Test English Apocrypha +\h Prayer of Manasseh +\toc1 Prayer of Manasseh +\toc2 Prayer of Manasseh +\toc3 Prayer of Manasseh +\mt1 Prayer of Manasseh\f + \fr 1.0 \ft Latin adds \fq King of Judah when he was held captive in Babylon\f* +\imt Introduction +\ip This prayer for forgiveness purports to be from King Manasseh during his imprisonment (see \xt 2 Chronicles 33:19\xt*), and appears to be originally written in Greek. It is found in the eighth chapter in the Book of Odes (chapter 12 in Rahlf’s edition), and is present in the Eastern Orthodox canon. +\c 1 +\q1 +\v 1 Lord Almighty,\f + \fr 1.1 \fq Almighty \ft Codex Alexandrinus adds \fq in heaven\f* +\q2 the God of our fathers:\x - \xo 1.1 \xt 2 Chr 33:12\x* +\q1 of Abraham, and Isaac, and Jacob,\x - \xo 1.1 \xt Ex 3:15, 16; Acts 3:13\x* +\q2 and of their righteous seed; +\q1 +\v 2 Who made heaven and the earth, and\f + \fr 1.2 \fq and \ft Greek \fq with\f* all the universe\f + \fr 1.2 \fq universe \ft Or \fqa adornment\fqa*. Greek \fq cosmos\fq*\f* within; +\q1 +\v 3 Who bound the sea by the word of your command,\x - \xo 1.3 \xt Job 33:8-11; Ps 74:12\x* +\q2 who closed the abyss and sealed it by your terrible and glorious name. +\q1 +\v 4 Who all things shudder and tremble before, because of your power; +\q1 +\v 5 For your majesty and glory is unbearable, +\q1 and the anger of your threat towards sinners is unendurable; +\q1 +\v 6 Both immeasurable and unsearchable is the mercy of your promise;\x - \xo 1.6 \xt Rom 11:33\x* +\q1 +\v 7 For you are the Lord Most High, +\q2 tender-hearted, longsuffering, abounding in mercy,\x - \xo 1.7 \xt Ex 34:6; Ps 86:15; Joel 2:13\x* +\q3 and you repent at the time of man’s trouble.\f + \fr 1.7 \ft Latin adds \fq Lord, according to your great goodness, you have promised repentance and forgiveness to those that have sinned against you, and in your infinite mercy have appointed repentance for sinners, so that they may be saved.\f* +\q1 +\v 8 Therefore you, Lord, the God of the righteous, +\q2 has not made repentance for the righteous,\x - \xo 1.8 \xt Lk 5:32\x* +\q1 for Abraham, and Isaac, and Jacob did not sin against you, +\q2 but you made repentance for me, a sinner. +\q1 +\v 9 Therefore my sins number more than the sand of the sea, +\q2 \f + \fr 1.9 \ft Codex Alexandrinus adds \fq For\f*my transgressions are multiplied, Lord, \add they\add*\f + \fr 1.9 \ft Latin reads \fq my transgressions\f* are multiplied,\f + \fr 1.9 \fq Lord, they are multiplied, \ft Codex Alexandrinus omits.\f*\x - \xo 1.9 \xt Is 59:12 \x* +\q1 and I am not worthy to look upon and see the height of heaven, +\q2 because of the multitude of my iniquities.\f + \fr 1.9 \ft Latin adds \fq Lord I now suffer justly, I deserve the trouble I receive, I am caught in a trap.\f*\x - \xo 1.9 \xt Ezra 9:6\x* +\q1 +\v 10 I am bowed down by many iron chains,\x - \xo 1.10 \xt 2 Chr 33:11\x* +\q2 I am rejected because of my sins,\f + \fr 1.10 \fq I am rejected because of my sins, \ft Latin reads \fq so that I cannot lift up my head,\f* +\q3 and I can find\f + \fr 1.10 \fq can find \ft Greek \fqa have\f* no rest; +\q1 Therefore I have kindled your anger, +\q2 I have done evil before you,\f + \fr 1.10 \ft Latin adds \fq I did not your will\f* +\q3 setting up abominations and abominable things.\f + \fr 1.10 \fq abominable things. \ft Greek \fqa objects of anger\fqa*. This word is often translated abominations (see \xt 2 Kings 23:13\xt*)\f*\x - \xo 1.10 \xt 2 Ki 21:2-9; 2 Chr 33:2-9\x* +\q1 +\v 11 And now I bend the knee of my heart, to pray to you for your kindness,\x - \xo 1.11 \xt Sir 17:25\x* +\q1 +\v 12 I have sinned, Lord, I have sinned, +\q2 and I acknowledge my transgressions.\f + \fr 1.12 \ft Ps 51:3\f* +\q1 +\v 13 I ask you in prayer, +\q2 forgive me, Lord, forgive me, +\q1 do not destroy me for my transgressions, +\q2 neither stay angry with me forever, storing up evil for me, +\q3 and do not\f + \fr 1.13 \fq and do not \ft Greek \fqa neither\f* condemn me to the depths of the earth.\x - \xo 1.13 \xt Ps 63:9; Ps 88:6\x* +\q1 For you are, Lord,\f + \fr 1.13 \fq Lord \ft Latin reads \fq God\f* the God of those who repent; +\q2 +\v 14 And to me you will show your goodness. +\q1 For \add though I am\add* unworthy, \add you will\add* save me according to your abounding mercy. +\q2 +\v 15 And I will praise you for all of the days of my life. +\q1 For all of the host of heaven sing your praise,\x - \xo 1.15 \xt Ps 103:21; S3Y 39\x* +\q2 and yours is the glory forever.\f + \fr 1.15 \fq forever \ft Latin reads \fq forever and ever\f* Amen.\x - \xo 1.15 \xt Rom 11:36; 16:7\x* diff --git a/samples/ApiExample/data/TEA/85PS2TEA.SFM b/samples/ApiExample/data/TEA/85PS2TEA.SFM new file mode 100644 index 00000000..fed19599 --- /dev/null +++ b/samples/ApiExample/data/TEA/85PS2TEA.SFM @@ -0,0 +1,32 @@ +\id PS2 - Test English Apocrypha +\h Psalm 151 +\toc1 Psalm 151 +\toc2 Psalm 151 +\toc3 Psalm 151 +\mt1 Psalm 151 +\imt Introduction +\ip Psalm 151 is included in some Septuagint manuscripts, and is present in the Dead Sea Scrolls (4QPs\sup a\sup*) in both Hebrew (151A) and Syraic (151B). The following is a translation of the version found in the Septuagint. +\c 1 +\cp 151 +\d This psalm is written by David in his own hand (although it is outside the number), after he had fought one-on-one with Goliath.\f + \fr 1.1 \fq Goliath \ft Greek \fq Goliad\f* +\q1 +\v 1 Smallest among my brothers, and the youngest in my father’s house; +\q2 I shepherded my father’s sheep.\x - \xo 1.1 \xt 1 Sam 16:11\x* +\q1 +\v 2 My hands made a harp; +\q2 my fingers fashioned a lyre.\x - \xo 1.2 \xt 1 Sam 16:23\x* +\q1 +\v 3 And who will report to my Lord? +\q2 The Lord himself, he hears.\f + \fr 1.3 \fq hears \ft Codex Sinaiticus: \fqa hears everything.\fqa*; Codex Alexandrinus: \fqa who will hear me. \f* +\q1 +\v 4 He sent his messenger\f + \fr 1.4 \fq messenger \ft Or \fqa angel\f* \add to me\add*, took me from my father’s sheep, +\q2 and anointed me with olive oil.\x - \xo 1.4 \xt 1 Sam 16:13\x* +\q1 +\v 5 My brothers were handsome and great \add indeed\add*, +\q2 but with them the Lord was not pleased.\x - \xo 1.5 \xt 1 Sam 16:10\x* +\q1 +\v 6 I came out to meet the foreigner, +\q2 and he cursed me by his idols.\x - \xo 1.6 \xt 1 Sam 17:43\x* +\q1 +\v 7 But I drew his own sword, beheaded him,\x - \xo 1.7 \xt 1 Sam 17:51\x* +\q2 and took away disgrace from Israel’s sons. diff --git a/samples/ApiExample/data/TEA/BookNames.xml b/samples/ApiExample/data/TEA/BookNames.xml new file mode 100644 index 00000000..833a316b --- /dev/null +++ b/samples/ApiExample/data/TEA/BookNames.xml @@ -0,0 +1,126 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/samples/ApiExample/data/TEA/C3LAOTEA.SFM b/samples/ApiExample/data/TEA/C3LAOTEA.SFM new file mode 100644 index 00000000..f5209310 --- /dev/null +++ b/samples/ApiExample/data/TEA/C3LAOTEA.SFM @@ -0,0 +1,37 @@ +\id LAO - Test English Apocrypha +\h Laodiceans +\toc1 Laodiceans +\toc2 Laodiceans +\toc3 Laodiceans +\mt1 Epistle to the Laodiceans +\imt Introduction +\ip The following is a translation of the J.B. Lightfoot’s reverse translation of the surviving Latin translation of the Epistle to the Laodiceans into Koine Greek. This translation, published in his commentary on Colossians and Philemon (new edition, 1879) is based on the premise that the original epistle is a composition of quotations from the Pauline Epistles, compiled by an unknown author, purporting to be a letter from Paul to the church at Laodicea. +\c 1 +\po +\v 1 Paul, an apostle—not from men nor through man, but through Jesus Christ,\x - \xo 1.1 \xt Gal 1:1\x* to the brothers who are in Laodicea.\x - \xo 1.1 \xt Col 4:16\x* +\v 2 Grace to you and peace from God the\f + \fr 1.2 \fq the \ft Some manuscripts \fq our\f* Father and the Lord Jesus Christ.\x - \xo 1.2 \xt Gal 1:3; Phil 1:2 \x* +\p +\v 3 I give thanks to Christ in all my prayers,\x - \xo 1.3 \xt Phil 1:3\x* that you are continuing in him and persevering in his works, eagerly awaiting the promise \add of salvation\add*\x - \xo 1.3 \xt Gal 5:5\x* in the day of judgment.\x - \xo 1.3 \xt 2 Pet 2:9; 3:7; cf. Phil 2:16\x* +\p +\v 4 Neither do the vain discussions of certain men\x - \xo 1.4 \xt 1 Tim 1:6\x* deceive you, with their aim to turn you away\x - \xo 1.4 \xt 2 Tim 4:4\x* from the truth of the gospel\x - \xo 1.4 \xt Col 1:5; Gal 2:5, 14\x* which is preached by me.\x - \xo 1.4 \xt Gal 1:11 (cf. Gal 1:8)\x* +\v 5 So\f + \fr 1.5 \fq So \ft Greek: \fqa And \f* now God will work in those who are \add imitators\add*\x - \xo 1.5 \xt 1 Thes 2:14\x* of me\f + \fr 1.5 \fq imitators of me \ft Greek \fqa of mine\f* to advance the truth of the gospel,\x - \xo 1.5 \xt Phil 1:12\x* […]\f + \fr 1.5 \fq […] \ft A section appears to be missing, according to J.B. Lightfoot. \f* worshipping and practicing generosity—works of salvation [and]\f + \fr 1.5 \fq [and] \ft It is doubtful that this word was in the original Greek.\f* of eternal life. +\v 6 And now my imprisonment\f + \fr 1.6 \fq imprisonment \ft Greek \fqa chains\f* is widely known, which I suffer in Christ, in which I rejoice and am glad.\x - \xo 1.6 \xt Matt 5:12 cf. Phil 1:18\x* +\v 7 And this is for my eternal salvation, which will occur through your prayers, and the help of the Holy Spirit,\x - \xo 1.7 \xt Phil 1:19\x* whether by life or by death.\x - \xo 1.7 \xt Phil 1:20\x* +\v 8 For to me, to live is Christ, and to die is joy.\x - \xo 1.8 \xt Phil 1:21\x* +\v 9 And so he will work in you according to his mercy, that you may have the same love, and be in full accord.\x - \xo 1.9 \xt Phil 2:2\x* +\v 10 Therefore beloved, as you have obeyed in my presence,\x - \xo 1.10 \xt Phil 2:12\x* so work, remembering\x - \xo 1.10 \xt 2 Thes 2:5 (Vulgate)\x* the fear of God,\f + \fr 1.10 \fq God \ft J.B. Lightfoot’s Greek text has \fqa Lord\fqa*, but this is not present in any Latin manuscripts.\f* and it will be to you eternal life,\f + \fr 1.10 \fq life, \ft The Latin and Greek text end the sentence here.\f* +\v 11 for it is God who works in you.\x - \xo 1.11 \xt Phil 2:13\x* +\v 12 And do without grumbling,\x - \xo 1.12 \xt Phil 2:14\x* whatever you do.\x - \xo 1.12 \xt Col 3:17\x* +\p +\v 13 And finally, beloved, rejoice in Christ.\x - \xo 1.13 \xt Phil 3:1\x* Look out for those \add who are\add* greedy for dishonest gain.\x - \xo 1.13 \xt 1 Tim 3:8; Tit 1:7\x* +\v 14 Let all your requests be made known to God,\x - \xo 1.14 \xt Phil 4:6\x* and be steadfast\x - \xo 1.14 \xt 1 Cor 15:58\x* in the mind of Christ.\x - \xo 1.14 \xt 1 Cor 2:16\x* +\v 15 Whatever is sound, and true, and honourable, and just,\f + \fr 1.15 \ft Some manuscripts add \fq and pure\f* and lovely,\x - \xo 1.15 \xt Phil 4:8\x* practice these things.\x - \xo 1.15 \xt Phil 4:9\x* +\v 16 And what you have heard and received, hold in your heart, and peace will be with you. +\p +\v 17 [Greet the brothers.\x - \xo 1.17 \xt 1 Thes 5:26\x*]\f + \fr 1.17 \ft Most manuscripts omit verse 17.\f* +\p +\v 18 The saints greet you.\f + \fr 1.18 \ft One manuscript omits this verse.\f*\x - \xo 1.18 \xt Phil 4:22\x* +\p +\v 19 The grace of the Lord Jesus Christ\f + \fr 1.19 \ft Some manuscripts omit \fq Christ\f* be with your spirit.\x - \xo 1.19 \xt Phil 4:28\x* +\p +\v 20 And have this \add letter\add* read to the Colossians, and that of the Colossians to you.\f + \fr 1.20 \ft One manuscript adds \fq Amen.\fq*, another manuscript omits this verse.\f*\x - \xo 1.20 \xt Col 4:16\x* diff --git a/samples/ApiExample/data/TEA/CommentTags.xml b/samples/ApiExample/data/TEA/CommentTags.xml new file mode 100644 index 00000000..624f1523 --- /dev/null +++ b/samples/ApiExample/data/TEA/CommentTags.xml @@ -0,0 +1,5 @@ + + + + 1 + \ No newline at end of file diff --git a/samples/ApiExample/data/TEA/ProjectProgress.xml b/samples/ApiExample/data/TEA/ProjectProgress.xml new file mode 100644 index 00000000..bd16524a --- /dev/null +++ b/samples/ApiExample/data/TEA/ProjectProgress.xml @@ -0,0 +1,20 @@ + + + + None + + 000000000000000000000000000000000000000000000000000000000000000000000000000000000011000000000000000000000000000000000000001 + + + 000001111111110010000000000000010000000000000000000000000000000000111001111111001010100000000000000000000000000000000000000 + + + 110110000000001100000000000000000000000111010000000001111010001111000000000000110101000000000000000000000000000111111111111 + + + 001000000000000000111100001000000000101000100110000110000001110000000110000000000000000000000000000000000000000000000000000 + + + 000000000000000001000011110111101111010000001001111000000100000000000000000000000000010000000000000000011100000000000000000 + + \ No newline at end of file diff --git a/samples/ApiExample/data/TEA/ProjectUpdates.xml b/samples/ApiExample/data/TEA/ProjectUpdates.xml new file mode 100644 index 00000000..0bbf0e6e --- /dev/null +++ b/samples/ApiExample/data/TEA/ProjectUpdates.xml @@ -0,0 +1,7 @@ + + + 1FE40EDA-1D82-4ED8-95D1-5F44B8EC25CD + 207EF1E9-D931-41A0-920D-96BAEF744746 + 5C974ECE-A444-4E5A-B980-125E3CDEE7E2 + B946EEE7-B890-47FA-BBEF-8D0E6F729F82 + \ No newline at end of file diff --git a/samples/ApiExample/data/TEA/Settings.xml b/samples/ApiExample/data/TEA/Settings.xml new file mode 100644 index 00000000..43bbbf3d --- /dev/null +++ b/samples/ApiExample/data/TEA/Settings.xml @@ -0,0 +1,32 @@ + + usfm.sty + 4 + English + 8.0.100.76 + Test English Apocrypha + 65001 + T + + NFC + TEA + a7e9f1c362e728a143bb5eef7f6c79bcab2478fa + Charis SIL + 12 + + + en::: + 41MAT + + TEA.SFM + Major::BiblicalTerms.xml + F + F + F + Public + Standard:: + + 3 + 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 + 000000000000000000000000000000000000000000000000000000000000000000000000000000000011000000000000000000000000000000000000001 + + \ No newline at end of file diff --git a/samples/ApiExample/data/TEA/en.ldml b/samples/ApiExample/data/TEA/en.ldml new file mode 100644 index 00000000..87c6fb5a --- /dev/null +++ b/samples/ApiExample/data/TEA/en.ldml @@ -0,0 +1,26 @@ +[A-Za-z][!'-),-.\:;?\[\]\u00B4\u200C\u200D\u2014\u2018\u2019\u201C\u201D]['\-\u00B4\u2014][][][a b c d e f g h i j k l m n o p q r s t u v w x y z {aa} {bb} {cc} {dd} {ee} {ff} {gg} {hh} {ii} {jj} {kk} {ll} {mm} {nn} {oo} {pp} {qq} {rr} {ss} {tt} {uu} {vv} {ww} {xx} {yy} {zz}][][]left-to-rightstandard \ No newline at end of file diff --git a/samples/ApiExample/data/TEA/unique.id b/samples/ApiExample/data/TEA/unique.id new file mode 100644 index 00000000..66104d45 --- /dev/null +++ b/samples/ApiExample/data/TEA/unique.id @@ -0,0 +1 @@ +ed450f1c-1d1f-4ef1-87ac-a6b1d3b4735b \ No newline at end of file diff --git a/samples/ApiExample/data/TMA/84MANTMA.SFM b/samples/ApiExample/data/TMA/84MANTMA.SFM new file mode 100644 index 00000000..ce7aa080 --- /dev/null +++ b/samples/ApiExample/data/TMA/84MANTMA.SFM @@ -0,0 +1,48 @@ +\id MAN - Test Maori Apocrypha +\h +\mt1 +\imt +\ip +\c 1 +\q1 \v 1 +\q2 +\q1 +\q2 +\q1 \v 2 +\q1 \v 3 +\q2 +\q1 \v 4 +\q1 \v 5 +\q1 +\q1 \v 6 +\q1 \v 7 +\q2 +\q3 +\q1 \v 8 +\q2 +\q1 +\q2 +\q1 \v 9 +\q2 +\q1 +\q2 +\q1 \v 10 +\q2 +\q3 +\q1 +\q2 +\q3 +\q1 \v 11 +\q1 \v 12 +\q2 +\q1 \v 13 +\q2 +\q1 +\q2 +\q3 +\q1 +\q2 \v 14 +\q1 +\q2 \v 15 +\q1 +\q2 diff --git a/samples/ApiExample/data/TMA/85PS2TMA.SFM b/samples/ApiExample/data/TMA/85PS2TMA.SFM new file mode 100644 index 00000000..1a1922d6 --- /dev/null +++ b/samples/ApiExample/data/TMA/85PS2TMA.SFM @@ -0,0 +1,32 @@ +\id PS2 - Test Māori Apocrypha +\h NGA WAIATA 151 +\toc1 Ko Nga Waiata 151 +\toc2 Nga Waiata 151 +\toc3 Waiata 151 +\mt1 NGA WAIATA 151 +\imt Te Tīmatanga Kōrero +\ip +\c 1 +\cp 151 +\d Na Rawiri i tuhituhi tenei waiata ki tona ringa ake (ahakoa kei waho i te tatau), i muri i tana whawhai kotahi ki a Golia. +\q1 +\v 1 He i iti ahau waenga i oku tuākana, me te pōtiki i te whare o āku papa; +\q2 I tiaki ahau i nga hipi a toku papa. +\q1 +\v 2 I hanga e oku ringa te hapa; +\q2 i hanga e oku maihao he kutā. +\q1 +\v 3 A ma wai e korero ki toku Ariki? +\q2 Ko te Ariki tonu, e rongo ana ia. +\q1 +\v 4 I tono mai ia i tana karere ki ahau, ka tango mai i ahau i roto i nga hipi a toku papa, +\q2 a pania ana ahau e ia ki te hinu. +\q1 +\v 5 He ataahua, he nunui rawa oku teina; +\q2 otiia kihai te Ariki i ahuareka ki a ratou. +\q1 +\v 6 I haere mai ahau kia whakatau i te tangata iwi ke, +\q2 a kanga iho ahau e ia ki ana whakapakoko. +\q1 +\v 7 Na unuhia ana e ahau tana hoari, tapahia ana tona matenga e ahau, +\q2 a ka tangohia e ahau te tawai o nga tama a Iharaira. diff --git a/samples/ApiExample/data/TMA/BookNames.xml b/samples/ApiExample/data/TMA/BookNames.xml new file mode 100644 index 00000000..833a316b --- /dev/null +++ b/samples/ApiExample/data/TMA/BookNames.xml @@ -0,0 +1,126 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/samples/ApiExample/data/TMA/C3LAOTMA.SFM b/samples/ApiExample/data/TMA/C3LAOTMA.SFM new file mode 100644 index 00000000..9459c187 --- /dev/null +++ b/samples/ApiExample/data/TMA/C3LAOTMA.SFM @@ -0,0 +1,14 @@ +\id LAO - Test Maori Apocrypha +\h +\mt1 +\imt +\ip +\c 1 +\po \v 1 \v 2 +\p \v 3 +\p \v 4 \v 5 \v 6 \v 7 \v 8 \v 9 \v 10 \v 11 \v 12 +\p \v 13 \v 14 \v 15 \v 16 +\p \v 17 +\p \v 18 +\p \v 19 +\p \v 20 diff --git a/samples/ApiExample/data/TMA/CommentTags.xml b/samples/ApiExample/data/TMA/CommentTags.xml new file mode 100644 index 00000000..624f1523 --- /dev/null +++ b/samples/ApiExample/data/TMA/CommentTags.xml @@ -0,0 +1,5 @@ + + + + 1 + \ No newline at end of file diff --git a/samples/ApiExample/data/TMA/ProjectProgress.xml b/samples/ApiExample/data/TMA/ProjectProgress.xml new file mode 100644 index 00000000..bd16524a --- /dev/null +++ b/samples/ApiExample/data/TMA/ProjectProgress.xml @@ -0,0 +1,20 @@ + + + + None + + 000000000000000000000000000000000000000000000000000000000000000000000000000000000011000000000000000000000000000000000000001 + + + 000001111111110010000000000000010000000000000000000000000000000000111001111111001010100000000000000000000000000000000000000 + + + 110110000000001100000000000000000000000111010000000001111010001111000000000000110101000000000000000000000000000111111111111 + + + 001000000000000000111100001000000000101000100110000110000001110000000110000000000000000000000000000000000000000000000000000 + + + 000000000000000001000011110111101111010000001001111000000100000000000000000000000000010000000000000000011100000000000000000 + + \ No newline at end of file diff --git a/samples/ApiExample/data/TMA/Settings.xml b/samples/ApiExample/data/TMA/Settings.xml new file mode 100644 index 00000000..a970e88e --- /dev/null +++ b/samples/ApiExample/data/TMA/Settings.xml @@ -0,0 +1,31 @@ + + usfm.sty + Maori + 8.0.100.76 + Test Maori Apocrypha + 65001 + T + + NFC + TMA + e1b3f0c799c4378a1757dd1b382c1dd515af37db + Charis SIL + 12 + + + mi::: + 41MAT + + TMA.SFM + Major::BiblicalTerms.xml + F + F + F + Public + Daughter:TEA:a7e9f1c362e728a143bb5eef7f6c79bcab2478fa + + 3 + 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 + 000000000000000000000000000000000000000000000000000000000000000000000000000000000011000000000000000000000000000000000000001 + + \ No newline at end of file diff --git a/samples/ApiExample/data/TMA/mi.ldml b/samples/ApiExample/data/TMA/mi.ldml new file mode 100644 index 00000000..aa095e0e --- /dev/null +++ b/samples/ApiExample/data/TMA/mi.ldml @@ -0,0 +1,15 @@ +[AEHIKM-PRTUWaehikm-prtuw\u0100\u0101\u0112\u0113\u012A\u012B\u014C\u014D\u016A\u016B{ng}{wh}][!(-*,-.\:;?\u00B6\u200C\u200D\u2010\u2014][*\-][][a e h i k m n {ng} o p r t u w {wh}][a e h i k m n {ng} o p r t u w {wh}][][]left-to-rightstandard \ No newline at end of file diff --git a/samples/ApiExample/data/TMA/unique.id b/samples/ApiExample/data/TMA/unique.id new file mode 100644 index 00000000..d3b98c55 --- /dev/null +++ b/samples/ApiExample/data/TMA/unique.id @@ -0,0 +1 @@ +f2ca92e1-0778-4424-9096-a1e64feb6123 \ No newline at end of file diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilder.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilder.cs index f8dfbcd5..ce0180b5 100644 --- a/src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilder.cs +++ b/src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilder.cs @@ -3,5 +3,5 @@ public interface IMachineBuilder { IServiceCollection Services { get; } - IConfiguration? Configuration { get; } + IConfiguration Configuration { get; } } diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilderExtensions.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilderExtensions.cs index 5a577cb5..c00fd45e 100644 --- a/src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilderExtensions.cs +++ b/src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilderExtensions.cs @@ -1,63 +1,28 @@ -using Serval.Translation.V1; +using Polly.Extensions.Http; +using Serval.Translation.V1; namespace Microsoft.Extensions.DependencyInjection; public static class IMachineBuilderExtensions { - public static IMachineBuilder AddServiceOptions( - this IMachineBuilder builder, - Action configureOptions - ) - { - builder.Services.Configure(configureOptions); - return builder; - } - public static IMachineBuilder AddServiceOptions(this IMachineBuilder builder, IConfiguration config) { builder.Services.Configure(config); return builder; } - public static IMachineBuilder AddSmtTransferEngineOptions( - this IMachineBuilder builder, - Action configureOptions - ) - { - builder.Services.Configure(configureOptions); - return builder; - } - public static IMachineBuilder AddSmtTransferEngineOptions(this IMachineBuilder builder, IConfiguration config) { builder.Services.Configure(config); return builder; } - public static IMachineBuilder AddClearMLOptions( - this IMachineBuilder builder, - Action configureOptions - ) - { - builder.Services.Configure(configureOptions); - return builder; - } - public static IMachineBuilder AddClearMLOptions(this IMachineBuilder builder, IConfiguration config) { builder.Services.Configure(config); return builder; } - public static IMachineBuilder AddDistributedReaderWriterLockOptions( - this IMachineBuilder build, - Action configureOptions - ) - { - build.Services.Configure(configureOptions); - return build; - } - public static IMachineBuilder AddDistributedReaderWriterLockOptions( this IMachineBuilder build, IConfiguration config @@ -67,45 +32,18 @@ IConfiguration config return build; } - public static IMachineBuilder AddMessageOutboxOptions( - this IMachineBuilder builder, - Action configureOptions - ) - { - builder.Services.Configure(configureOptions); - return builder; - } - public static IMachineBuilder AddMessageOutboxOptions(this IMachineBuilder builder, IConfiguration config) { builder.Services.Configure(config); return builder; } - public static IMachineBuilder AddSharedFileOptions( - this IMachineBuilder builder, - Action configureOptions - ) - { - builder.Services.Configure(configureOptions); - return builder; - } - public static IMachineBuilder AddSharedFileOptions(this IMachineBuilder builder, IConfiguration config) { builder.Services.Configure(config); return builder; } - public static IMachineBuilder AddBuildJobOptions( - this IMachineBuilder builder, - Action configureOptions - ) - { - builder.Services.Configure(configureOptions); - return builder; - } - public static IMachineBuilder AddBuildJobOptions(this IMachineBuilder builder, IConfiguration config) { builder.Services.Configure(config); @@ -114,20 +52,7 @@ public static IMachineBuilder AddBuildJobOptions(this IMachineBuilder builder, I public static IMachineBuilder AddThotSmtModel(this IMachineBuilder builder) { - if (builder.Configuration is null) - return builder.AddThotSmtModel(o => { }); - else - return builder.AddThotSmtModel(builder.Configuration.GetSection(ThotSmtModelOptions.Key)); - } - - public static IMachineBuilder AddThotSmtModel( - this IMachineBuilder builder, - Action configureOptions - ) - { - builder.Services.Configure(configureOptions); - builder.Services.AddSingleton(); - return builder; + return builder.AddThotSmtModel(builder.Configuration.GetSection(ThotSmtModelOptions.Key)); } public static IMachineBuilder AddThotSmtModel(this IMachineBuilder builder, IConfiguration config) @@ -151,17 +76,38 @@ public static IMachineBuilder AddUnigramTruecaser(this IMachineBuilder builder) public static IMachineBuilder AddClearMLService(this IMachineBuilder builder, string? connectionString = null) { - connectionString ??= builder.Configuration?.GetConnectionString("ClearML"); + connectionString ??= builder.Configuration.GetConnectionString("ClearML"); if (connectionString is null) throw new InvalidOperationException("ClearML connection string is required"); + var policy = Policy + .Handle() + .OrTransientHttpStatusCode() + .OrResult(msg => msg.StatusCode == HttpStatusCode.TooManyRequests) + .WaitAndRetryAsync( + 7, + retryAttempt => TimeSpan.FromSeconds(2 * retryAttempt), // total 56, less than the 1 minute limit + onRetryAsync: (outcome, timespan, retryAttempt, context) => + { + if (retryAttempt < 3) + return Task.CompletedTask; + // Log the retry attempt + var serviceProvider = builder.Services.BuildServiceProvider(); + var logger = serviceProvider.GetService>(); + logger?.LogInformation( + "Retry {RetryAttempt} encountered an error. Waiting {Timespan} before next retry. Error: {ErrorMessage}", + retryAttempt, + timespan, + outcome.Exception?.Message + ); + return Task.CompletedTask; + } + ); + builder .Services.AddHttpClient("ClearML") .ConfigureHttpClient(httpClient => httpClient.BaseAddress = new Uri(connectionString!)) - // Add retry policy; fail after approx. 2 + 4 + 8 = 14 seconds - .AddTransientHttpErrorPolicy(b => - b.WaitAndRetryAsync(3, retryAttempt => TimeSpan.FromSeconds(Math.Pow(2, retryAttempt))) - ); + .AddPolicyHandler(policy); builder.Services.AddSingleton(); @@ -199,7 +145,7 @@ public static IMachineBuilder AddMongoHangfireJobClient( string? connectionString = null ) { - connectionString ??= builder.Configuration?.GetConnectionString("Hangfire"); + connectionString ??= builder.Configuration.GetConnectionString("Hangfire"); if (connectionString is null) throw new InvalidOperationException("Hangfire connection string is required"); @@ -220,7 +166,7 @@ public static IMachineBuilder AddHangfireJobServer( ) { engineTypes ??= - builder.Configuration?.GetSection("TranslationEngines").Get() + builder.Configuration.GetSection("TranslationEngines").Get() ?? [TranslationEngineType.SmtTransfer, TranslationEngineType.Nmt]; var queues = new List(); foreach (TranslationEngineType engineType in engineTypes.Distinct()) @@ -261,7 +207,7 @@ public static IMachineBuilder AddMemoryDataAccess(this IMachineBuilder builder) public static IMachineBuilder AddMongoDataAccess(this IMachineBuilder builder, string? connectionString = null) { - connectionString ??= builder.Configuration?.GetConnectionString("Mongo"); + connectionString ??= builder.Configuration.GetConnectionString("Mongo"); if (connectionString is null) throw new InvalidOperationException("Mongo connection string is required"); builder.Services.AddMongoDataAccess( @@ -316,7 +262,7 @@ public static IMachineBuilder AddServalPlatformService( string? connectionString = null ) { - connectionString ??= builder.Configuration?.GetConnectionString("Serval"); + connectionString ??= builder.Configuration.GetConnectionString("Serval"); if (connectionString is null) throw new InvalidOperationException("Serval connection string is required"); @@ -383,7 +329,7 @@ public static IMachineBuilder AddServalTranslationEngineService( builder.AddServalPlatformService(connectionString); engineTypes ??= - builder.Configuration?.GetSection("TranslationEngines").Get() + builder.Configuration.GetSection("TranslationEngines").Get() ?? [TranslationEngineType.SmtTransfer, TranslationEngineType.Nmt]; foreach (TranslationEngineType engineType in engineTypes.Distinct()) { @@ -422,7 +368,7 @@ public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder, s if (smtTransferEngineDir is null) { var smtTransferEngineOptions = new SmtTransferEngineOptions(); - builder.Configuration?.GetSection(SmtTransferEngineOptions.Key).Bind(smtTransferEngineOptions); + builder.Configuration.GetSection(SmtTransferEngineOptions.Key).Bind(smtTransferEngineOptions); smtTransferEngineDir = smtTransferEngineOptions.EnginesDir; } string? driveLetter = Path.GetPathRoot(smtTransferEngineDir)?[..1]; diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/IServiceCollectionExtensions.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/IServiceCollectionExtensions.cs index 9ae176d8..c72302b9 100644 --- a/src/Machine/src/Serval.Machine.Shared/Configuration/IServiceCollectionExtensions.cs +++ b/src/Machine/src/Serval.Machine.Shared/Configuration/IServiceCollectionExtensions.cs @@ -2,7 +2,7 @@ public static class IServiceCollectionExtensions { - public static IMachineBuilder AddMachine(this IServiceCollection services, IConfiguration? configuration = null) + public static IMachineBuilder AddMachine(this IServiceCollection services, IConfiguration configuration) { if (!Sldr.IsInitialized) Sldr.Initialize(); @@ -22,28 +22,13 @@ public static IMachineBuilder AddMachine(this IServiceCollection services, IConf ); var builder = new MachineBuilder(services, configuration); - if (configuration is null) - { - builder.AddServiceOptions(o => { }); - builder.AddSharedFileOptions(o => { }); - builder.AddSmtTransferEngineOptions(o => { }); - builder.AddClearMLOptions(o => { }); - builder.AddDistributedReaderWriterLockOptions(o => { }); - builder.AddBuildJobOptions(o => { }); - builder.AddMessageOutboxOptions(o => { }); - } - else - { - builder.AddServiceOptions(configuration.GetSection(ServiceOptions.Key)); - builder.AddSharedFileOptions(configuration.GetSection(SharedFileOptions.Key)); - builder.AddSmtTransferEngineOptions(configuration.GetSection(SmtTransferEngineOptions.Key)); - builder.AddClearMLOptions(configuration.GetSection(ClearMLOptions.Key)); - builder.AddDistributedReaderWriterLockOptions( - configuration.GetSection(DistributedReaderWriterLockOptions.Key) - ); - builder.AddBuildJobOptions(configuration.GetSection(BuildJobOptions.Key)); - builder.AddMessageOutboxOptions(configuration.GetSection(MessageOutboxOptions.Key)); - } + builder.AddServiceOptions(configuration.GetSection(ServiceOptions.Key)); + builder.AddSharedFileOptions(configuration.GetSection(SharedFileOptions.Key)); + builder.AddSmtTransferEngineOptions(configuration.GetSection(SmtTransferEngineOptions.Key)); + builder.AddClearMLOptions(configuration.GetSection(ClearMLOptions.Key)); + builder.AddDistributedReaderWriterLockOptions(configuration.GetSection(DistributedReaderWriterLockOptions.Key)); + builder.AddBuildJobOptions(configuration.GetSection(BuildJobOptions.Key)); + builder.AddMessageOutboxOptions(configuration.GetSection(MessageOutboxOptions.Key)); return builder; } diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/MachineBuilder.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/MachineBuilder.cs index 58ddf5c1..5fece454 100644 --- a/src/Machine/src/Serval.Machine.Shared/Configuration/MachineBuilder.cs +++ b/src/Machine/src/Serval.Machine.Shared/Configuration/MachineBuilder.cs @@ -1,7 +1,7 @@ namespace Microsoft.Extensions.DependencyInjection; -internal class MachineBuilder(IServiceCollection services, IConfiguration? configuration) : IMachineBuilder +internal class MachineBuilder(IServiceCollection services, IConfiguration configuration) : IMachineBuilder { public IServiceCollection Services { get; } = services; - public IConfiguration? Configuration { get; } = configuration; + public IConfiguration Configuration { get; } = configuration; } diff --git a/src/Machine/src/Serval.Machine.Shared/Models/Build.cs b/src/Machine/src/Serval.Machine.Shared/Models/Build.cs index b3578537..aca20540 100644 --- a/src/Machine/src/Serval.Machine.Shared/Models/Build.cs +++ b/src/Machine/src/Serval.Machine.Shared/Models/Build.cs @@ -29,5 +29,4 @@ public record Build public required BuildJobRunnerType BuildJobRunner { get; init; } public required BuildStage Stage { get; init; } public string? Options { get; set; } - public Dictionary? Statistics { get; set; } } diff --git a/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj b/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj index 3091b02f..b9985198 100644 --- a/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj +++ b/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj @@ -36,9 +36,9 @@ - - - + + + diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ClearMLService.cs b/src/Machine/src/Serval.Machine.Shared/Services/ClearMLService.cs index 2b2b6718..66e1b350 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/ClearMLService.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/ClearMLService.cs @@ -161,7 +161,7 @@ public async Task> GetTasksForQueueAsync( var body = new JsonObject { ["queue"] = queueId }; JsonObject? result = await CallAsync("queues", "get_by_id", body, cancellationToken); var tasks = (JsonArray?)result?["data"]?["queue"]?["entries"]; - IEnumerable taskIds = tasks?.Select(t => (string)t?["id"]!) ?? new List(); + IEnumerable taskIds = tasks?.Select(t => (string)t?["task"]!) ?? new List(); return await GetTasksByIdAsync(taskIds, cancellationToken); } diff --git a/src/Machine/src/Serval.Machine.Shared/Services/IPlatformService.cs b/src/Machine/src/Serval.Machine.Shared/Services/IPlatformService.cs index 5dfae87b..69c4c7d2 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/IPlatformService.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/IPlatformService.cs @@ -28,10 +28,10 @@ Task InsertPretranslationsAsync( CancellationToken cancellationToken = default ); - Task UpdateBuildStatisticsAsync( + Task UpdateBuildExecutionDataAsync( string engineId, string buildId, - IDictionary statistics, + IReadOnlyDictionary executionData, CancellationToken cancellationToken = default ); } diff --git a/src/Machine/src/Serval.Machine.Shared/Services/PreprocessBuildJob.cs b/src/Machine/src/Serval.Machine.Shared/Services/PreprocessBuildJob.cs index 6d00355f..6d8506a0 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/PreprocessBuildJob.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/PreprocessBuildJob.cs @@ -82,12 +82,12 @@ CancellationToken cancellationToken ); } - var statistics = new Dictionary() + var executionData = new Dictionary() { - { "initialTrainCount", trainCount.ToString(CultureInfo.InvariantCulture) }, - { "initialPretranslateCount", pretranslateCount.ToString(CultureInfo.InvariantCulture) } + { "trainCount", trainCount.ToString(CultureInfo.InvariantCulture) }, + { "pretranslateCount", pretranslateCount.ToString(CultureInfo.InvariantCulture) } }; - await PlatformService.UpdateBuildStatisticsAsync(engineId, buildId, statistics, cancellationToken); + await PlatformService.UpdateBuildExecutionDataAsync(engineId, buildId, executionData, cancellationToken); cancellationToken.ThrowIfCancellationRequested(); @@ -146,12 +146,16 @@ row.Ref is not ScriptureRef sr ); }) .ToArray(); - ITextCorpus[] sourcePretranslateCorpora = sourceCorpora + ITextCorpus? sourcePretranslateCorpus = sourceCorpora .Select(sc => { ITextCorpus textCorpus = sc.TextCorpus; if (sc.Corpus.PretranslateTextIds is not null) - textCorpus = textCorpus.FilterTexts(sc.Corpus.PretranslateTextIds); + { + textCorpus = textCorpus.FilterTexts( + sc.Corpus.PretranslateTextIds.Except(sc.Corpus.TrainOnTextIds ?? new()) + ); + } return textCorpus.Where(row => row.Ref is not ScriptureRef sr || sc.Corpus.PretranslateChapters is null @@ -161,7 +165,8 @@ row.Ref is not ScriptureRef sr ) ); }) - .ToArray(); + .ToArray() + .FirstOrDefault(); (MonolingualCorpus Corpus, ITextCorpus TextCorpus)[] targetCorpora = corpus .TargetCorpora.SelectMany(c => _corpusService.CreateTextCorpora(c.Files).Select(tc => (c, tc))) @@ -261,11 +266,13 @@ void WriteRow(Utf8JsonWriter writer, string textId, IReadOnlyList refs, ITextCorpus targetCorpus = targetCorpora.Length > 0 ? targetCorpora[0].TextCorpus : new DictionaryTextCorpus(); - - foreach (Row row in AlignPretranslateCorpus(sourcePretranslateCorpora, targetCorpus)) + if (sourcePretranslateCorpus != null) { - if (row.SourceSegment.Length > 0) - WriteRow(pretranslateWriter, row.TextId, row.Refs, row.SourceSegment); + foreach (Row row in AlignPretranslateCorpus(sourcePretranslateCorpus, targetCorpus)) + { + if (row.SourceSegment.Length > 0 && (row.TargetSegment.Length == 0 || !targetCorpus.Any())) + WriteRow(pretranslateWriter, row.TextId, row.Refs, row.SourceSegment); + } } } @@ -422,14 +429,18 @@ IReadOnlyList trgCorpora } } - private static IEnumerable AlignPretranslateCorpus(ITextCorpus[] srcCorpora, ITextCorpus trgCorpus) + private static IEnumerable AlignPretranslateCorpus(ITextCorpus srcCorpus, ITextCorpus trgCorpus) { int rowCount = 0; StringBuilder srcSegBuffer = new(); StringBuilder trgSegBuffer = new(); List refs = []; string textId = ""; - foreach (ParallelTextRow row in srcCorpora.SelectMany(sc => sc.AlignRows(trgCorpus, allSourceRows: true))) + + srcCorpus = srcCorpus.Transform(CleanSegment); + trgCorpus = trgCorpus.Transform(CleanSegment); + + foreach (ParallelTextRow row in srcCorpus.AlignRows(trgCorpus, allSourceRows: true)) { if (!row.IsTargetRangeStart && row.IsTargetInRange) { @@ -446,7 +457,8 @@ private static IEnumerable AlignPretranslateCorpus(ITextCorpus[] srcCorpora { if (rowCount > 0) { - yield return new(textId, refs, srcSegBuffer.ToString(), trgSegBuffer.ToString(), 1); + if (trgSegBuffer.Length == 0) + yield return new(textId, refs, srcSegBuffer.ToString(), trgSegBuffer.ToString(), 1); textId = ""; srcSegBuffer.Clear(); trgSegBuffer.Clear(); diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ServalPlatformOutboxConstants.cs b/src/Machine/src/Serval.Machine.Shared/Services/ServalPlatformOutboxConstants.cs index 98342460..1ad8d425 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/ServalPlatformOutboxConstants.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/ServalPlatformOutboxConstants.cs @@ -11,5 +11,5 @@ public static class ServalPlatformOutboxConstants public const string BuildRestarting = "BuildRestarting"; public const string InsertPretranslations = "InsertPretranslations"; public const string IncrementTranslationEngineCorpusSize = "IncrementTranslationEngineCorpusSize"; - public const string UpdateBuildStatistics = "UpdateBuildStatistics"; + public const string UpdateBuildExecutionData = "UpdateBuildExecutionData"; } diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ServalPlatformOutboxMessageHandler.cs b/src/Machine/src/Serval.Machine.Shared/Services/ServalPlatformOutboxMessageHandler.cs index 4cd2e018..490ed650 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/ServalPlatformOutboxMessageHandler.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/ServalPlatformOutboxMessageHandler.cs @@ -85,9 +85,9 @@ await _client.IncrementTranslationEngineCorpusSizeAsync( cancellationToken: cancellationToken ); break; - case ServalPlatformOutboxConstants.UpdateBuildStatistics: - await _client.UpdateBuildStatisticsAsync( - JsonSerializer.Deserialize(content!), + case ServalPlatformOutboxConstants.UpdateBuildExecutionData: + await _client.UpdateBuildExecutionDataAsync( + JsonSerializer.Deserialize(content!), cancellationToken: cancellationToken ); break; diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ServalPlatformService.cs b/src/Machine/src/Serval.Machine.Shared/Services/ServalPlatformService.cs index 312e6f45..fece316c 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/ServalPlatformService.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/ServalPlatformService.cs @@ -138,18 +138,18 @@ await _outboxService.EnqueueMessageAsync( ); } - public async Task UpdateBuildStatisticsAsync( + public async Task UpdateBuildExecutionDataAsync( string engineId, string buildId, - IDictionary statistics, + IReadOnlyDictionary executionData, CancellationToken cancellationToken = default ) { - var request = new UpdateBuildStatisticsRequest { EngineId = engineId, BuildId = buildId }; - request.Statistics.Add(statistics); + var request = new UpdateBuildExecutionDataRequest { EngineId = engineId, BuildId = buildId }; + request.ExecutionData.Add((IDictionary)executionData); await _outboxService.EnqueueMessageAsync( ServalPlatformOutboxConstants.OutboxId, - ServalPlatformOutboxConstants.UpdateBuildStatistics, + ServalPlatformOutboxConstants.UpdateBuildExecutionData, engineId, JsonSerializer.Serialize(request), cancellationToken: cancellationToken diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs b/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs index 539b9c4c..d29f2213 100644 --- a/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs @@ -65,7 +65,7 @@ public async Task RunAsync_TrainAndPretranslateAll() await env.RunBuildJobAsync(corpus1); - Assert.That(await env.GetPretranslateCountAsync(), Is.EqualTo(4)); + Assert.That(await env.GetPretranslateCountAsync(), Is.EqualTo(2)); } [Test] @@ -76,7 +76,7 @@ public async Task RunAsync_PretranslateAll() await env.RunBuildJobAsync(corpus1); - Assert.That(await env.GetPretranslateCountAsync(), Is.EqualTo(4)); + Assert.That(await env.GetPretranslateCountAsync(), Is.EqualTo(2)); } [Test] @@ -87,7 +87,7 @@ public async Task RunAsync_PretranslateTextIds() await env.RunBuildJobAsync(corpus1); - Assert.That(await env.GetPretranslateCountAsync(), Is.EqualTo(4)); + Assert.That(await env.GetPretranslateCountAsync(), Is.EqualTo(2)); } [Test] @@ -189,7 +189,11 @@ public async Task RunAsync_MixedSource_Paratext() Assert.That(trgCount, Is.EqualTo(1)); Assert.That(termCount, Is.EqualTo(0)); }); - Assert.That(await env.GetPretranslateCountAsync(), Is.EqualTo(56)); + Assert.That( + await env.GetPretranslateCountAsync(), + Is.EqualTo(13), + (await env.GetPretranslationsAsync())?.ToJsonString() + ); } [Test] @@ -208,7 +212,11 @@ public async Task RunAsync_MixedSource_Text() Assert.That(trgCount, Is.EqualTo(1)); Assert.That(termCount, Is.EqualTo(0)); }); - Assert.That(await env.GetPretranslateCountAsync(), Is.EqualTo(9)); + Assert.That( + await env.GetPretranslateCountAsync(), + Is.EqualTo(2), + (await env.GetPretranslationsAsync())?.ToJsonString() + ); } [Test] @@ -471,7 +479,7 @@ await env.GetTargetExtractAsync(), }); JsonArray? pretranslations = await env.GetPretranslationsAsync(); Assert.That(pretranslations, Is.Not.Null); - Assert.That(pretranslations!.Count, Is.EqualTo(37), pretranslations.ToJsonString()); + Assert.That(pretranslations!.Count, Is.EqualTo(7), pretranslations.ToJsonString()); Assert.That( pretranslations[2]!["translation"]!.ToString(), Is.EqualTo("Source one, chapter twelve, verse one.") @@ -1010,7 +1018,8 @@ public async Task GetTargetExtractAsync() public async Task GetPretranslateCountAsync() { - return (await GetPretranslationsAsync())?.Count ?? 0; + var pretranslations = await GetPretranslationsAsync(); + return pretranslations?.Count ?? 0; } private void ZipParatextProject(string name) diff --git a/src/Serval/src/Serval.Assessment/Configuration/IServalBuilderExtensions.cs b/src/Serval/src/Serval.Assessment/Configuration/IServalBuilderExtensions.cs index d770433d..ee82803b 100644 --- a/src/Serval/src/Serval.Assessment/Configuration/IServalBuilderExtensions.cs +++ b/src/Serval/src/Serval.Assessment/Configuration/IServalBuilderExtensions.cs @@ -5,27 +5,17 @@ namespace Microsoft.Extensions.DependencyInjection; public static class IServalBuilderExtensions { - public static IServalBuilder AddAssessment(this IServalBuilder builder, Action? configure = null) + public static IServalBuilder AddAssessment(this IServalBuilder builder) { - if (builder.Configuration is null) - { - builder.AddApiOptions(o => { }); - builder.AddDataFileOptions(o => { }); - } - else - { - builder.AddApiOptions(builder.Configuration.GetSection(ApiOptions.Key)); - builder.AddDataFileOptions(builder.Configuration.GetSection(DataFileOptions.Key)); - } + builder.AddApiOptions(builder.Configuration.GetSection(ApiOptions.Key)); + builder.AddDataFileOptions(builder.Configuration.GetSection(DataFileOptions.Key)); builder.Services.AddScoped(); builder.Services.AddScoped(); builder.Services.AddScoped(); var assessmentOptions = new AssessmentOptions(); - builder.Configuration?.GetSection(AssessmentOptions.Key).Bind(assessmentOptions); - if (configure is not null) - configure(assessmentOptions); + builder.Configuration.GetSection(AssessmentOptions.Key).Bind(assessmentOptions); foreach (EngineInfo engine in assessmentOptions.Engines) { diff --git a/src/Serval/src/Serval.Client/Client.g.cs b/src/Serval/src/Serval.Client/Client.g.cs index 1b5d4721..5bc156cf 100644 --- a/src/Serval/src/Serval.Client/Client.g.cs +++ b/src/Serval/src/Serval.Client/Client.g.cs @@ -4218,7 +4218,7 @@ public partial interface ITranslationEnginesClient /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Add a corpus to a translation engine + /// Add a corpus to a translation engine (obsolete - use parallel corpora instead) /// /// /// ## Parameters @@ -4242,20 +4242,22 @@ public partial interface ITranslationEnginesClient /// The corpus configuration (see remarks) /// The added corpus /// A server side error occurred. + [System.Obsolete] System.Threading.Tasks.Task AddCorpusAsync(string id, TranslationCorpusConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get all corpora for a translation engine + /// Get all corpora for a translation engine (obsolete - use parallel corpora instead) /// /// The translation engine id /// The corpora /// A server side error occurred. + [System.Obsolete] System.Threading.Tasks.Task> GetAllCorporaAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Update a corpus with a new set of files + /// Update a corpus with a new set of files (obsolete - use parallel corpora instead) /// /// /// See posting a new corpus for details of use. Will completely replace corpus' file associations. @@ -4266,16 +4268,18 @@ public partial interface ITranslationEnginesClient /// The corpus configuration /// The corpus was updated successfully /// A server side error occurred. + [System.Obsolete] System.Threading.Tasks.Task UpdateCorpusAsync(string id, string corpusId, TranslationCorpusUpdateConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get the configuration of a corpus for a translation engine + /// Get the configuration of a corpus for a translation engine (obsolete - use parallel corpora instead) /// /// The translation engine id /// The corpus id /// The corpus configuration /// A server side error occurred. + [System.Obsolete] System.Threading.Tasks.Task GetCorpusAsync(string id, string corpusId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. @@ -4355,7 +4359,7 @@ public partial interface ITranslationEnginesClient /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get all pretranslations in a corpus of a translation engine + /// Get all pretranslations in a corpus or parallel corpus of a translation engine /// /// /// Pretranslations are arranged in a list of dictionaries with the following fields per pretranslation: @@ -4369,7 +4373,7 @@ public partial interface ITranslationEnginesClient ///
Only pretranslations for the most recent successful build of the engine are returned. ///
/// The translation engine id - /// The corpus id + /// The corpus id or parallel corpus id /// The text id (optional) /// The pretranslations /// A server side error occurred. @@ -4377,7 +4381,7 @@ public partial interface ITranslationEnginesClient /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get all pretranslations for the specified text in a corpus of a translation engine + /// Get all pretranslations for the specified text in a corpus or parallel corpus of a translation engine /// /// /// Pretranslations are arranged in a list of dictionaries with the following fields per pretranslation: @@ -4390,7 +4394,7 @@ public partial interface ITranslationEnginesClient ///
Only pretranslations for the most recent successful build of the engine are returned. ///
/// The translation engine id - /// The corpus id + /// The corpus id or parallel corpus id /// The text id /// The pretranslations /// A server side error occurred. @@ -4416,7 +4420,7 @@ public partial interface ITranslationEnginesClient ///
Both scripture and non-scripture text in the USFM is parsed and grouped according to [this wiki](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation). ///
/// The translation engine id - /// The corpus id + /// The corpus id or parallel corpus id /// The text id /// The source[s] of the data to populate the USFM file with. /// The book in USFM format @@ -4437,10 +4441,21 @@ public partial interface ITranslationEnginesClient /// Starts a build job for a translation engine. /// /// - /// Specify the corpora and textIds/scriptureRanges within those corpora to train on. Only one type of corpus may be used: either corpora (see /translation/engines/{id}/corpora) or parallel corpora (see /translation/engines/{id}/parallel-corpora). If no "trainOn" field is provided, all corpora will be used. - ///
Paratext projects can be filtered by [book](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) using the textId for training. - ///
Filters can also be supplied via scriptureRange parameter as ranges of biblical text. See [here](https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range) - ///
All Paratext project filtering follows original versification. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information. + /// Specify the corpora and textIds/scriptureRanges within those corpora to train on. Only one type of corpus may be used: either (legacy) corpora (see /translation/engines/{id}/corpora) or parallel corpora (see /translation/engines/{id}/parallel-corpora). + ///
Specifying a corpus: + ///
* A (legacy) corpus is selected by specifying CorpusId and a parallel corpus is selected by specifying ParallelCorpusId. + ///
* A parallel corpus can be further filtered by specifying particular CorpusIds in SourceFilters or TargetFilters. + ///
+ ///
Filtering by textID or chapter: + ///
* Paratext projects can be filtered by [book](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) using the textId for training. + ///
* Filters can also be supplied via scriptureRange parameter as ranges of biblical text. See [here](https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range) + ///
* All Paratext project filtering follows original versification. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information. + ///
+ ///
Filter - train on all or none + ///
* If trainOn or pretranslate is not provided, all corpora will be used for training or pretranslation respectively + ///
* If a corpus is selected for training or pretranslation and neither scriptureRange nor textIds are defined, all of the selected corpus will be used. + ///
* If a corpus is selected for training or pretranslation and an empty scriptureRange or textIds is defined, none of the selected corpus will be used. + ///
* If a corpus is selected for training or pretranslation but no further filters are provided, all selected corpora will be used for training or pretranslation respectively. ///
///
Specify the corpora and textIds/scriptureRanges within those corpora to pretranslate. When a corpus is selected for pretranslation, ///
the following text will be pretranslated: @@ -5531,7 +5546,7 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Add a corpus to a translation engine + /// Add a corpus to a translation engine (obsolete - use parallel corpora instead) /// /// /// ## Parameters @@ -5555,6 +5570,7 @@ public string BaseUrl /// The corpus configuration (see remarks) /// The added corpus /// A server side error occurred. + [System.Obsolete] public virtual async System.Threading.Tasks.Task AddCorpusAsync(string id, TranslationCorpusConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) @@ -5667,11 +5683,12 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get all corpora for a translation engine + /// Get all corpora for a translation engine (obsolete - use parallel corpora instead) /// /// The translation engine id /// The corpora /// A server side error occurred. + [System.Obsolete] public virtual async System.Threading.Tasks.Task> GetAllCorporaAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) @@ -5771,7 +5788,7 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Update a corpus with a new set of files + /// Update a corpus with a new set of files (obsolete - use parallel corpora instead) /// /// /// See posting a new corpus for details of use. Will completely replace corpus' file associations. @@ -5782,6 +5799,7 @@ public string BaseUrl /// The corpus configuration /// The corpus was updated successfully /// A server side error occurred. + [System.Obsolete] public virtual async System.Threading.Tasks.Task UpdateCorpusAsync(string id, string corpusId, TranslationCorpusUpdateConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) @@ -5898,12 +5916,13 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get the configuration of a corpus for a translation engine + /// Get the configuration of a corpus for a translation engine (obsolete - use parallel corpora instead) /// /// The translation engine id /// The corpus id /// The corpus configuration /// A server side error occurred. + [System.Obsolete] public virtual async System.Threading.Tasks.Task GetCorpusAsync(string id, string corpusId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) @@ -6688,7 +6707,7 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get all pretranslations in a corpus of a translation engine + /// Get all pretranslations in a corpus or parallel corpus of a translation engine /// /// /// Pretranslations are arranged in a list of dictionaries with the following fields per pretranslation: @@ -6702,7 +6721,7 @@ public string BaseUrl ///
Only pretranslations for the most recent successful build of the engine are returned. ///
/// The translation engine id - /// The corpus id + /// The corpus id or parallel corpus id /// The text id (optional) /// The pretranslations /// A server side error occurred. @@ -6822,7 +6841,7 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get all pretranslations for the specified text in a corpus of a translation engine + /// Get all pretranslations for the specified text in a corpus or parallel corpus of a translation engine /// /// /// Pretranslations are arranged in a list of dictionaries with the following fields per pretranslation: @@ -6835,7 +6854,7 @@ public string BaseUrl ///
Only pretranslations for the most recent successful build of the engine are returned. ///
/// The translation engine id - /// The corpus id + /// The corpus id or parallel corpus id /// The text id /// The pretranslations /// A server side error occurred. @@ -6971,7 +6990,7 @@ public string BaseUrl ///
Both scripture and non-scripture text in the USFM is parsed and grouped according to [this wiki](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation). ///
/// The translation engine id - /// The corpus id + /// The corpus id or parallel corpus id /// The text id /// The source[s] of the data to populate the USFM file with. /// The book in USFM format @@ -7217,10 +7236,21 @@ public string BaseUrl /// Starts a build job for a translation engine. /// /// - /// Specify the corpora and textIds/scriptureRanges within those corpora to train on. Only one type of corpus may be used: either corpora (see /translation/engines/{id}/corpora) or parallel corpora (see /translation/engines/{id}/parallel-corpora). If no "trainOn" field is provided, all corpora will be used. - ///
Paratext projects can be filtered by [book](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) using the textId for training. - ///
Filters can also be supplied via scriptureRange parameter as ranges of biblical text. See [here](https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range) - ///
All Paratext project filtering follows original versification. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information. + /// Specify the corpora and textIds/scriptureRanges within those corpora to train on. Only one type of corpus may be used: either (legacy) corpora (see /translation/engines/{id}/corpora) or parallel corpora (see /translation/engines/{id}/parallel-corpora). + ///
Specifying a corpus: + ///
* A (legacy) corpus is selected by specifying CorpusId and a parallel corpus is selected by specifying ParallelCorpusId. + ///
* A parallel corpus can be further filtered by specifying particular CorpusIds in SourceFilters or TargetFilters. + ///
+ ///
Filtering by textID or chapter: + ///
* Paratext projects can be filtered by [book](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) using the textId for training. + ///
* Filters can also be supplied via scriptureRange parameter as ranges of biblical text. See [here](https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range) + ///
* All Paratext project filtering follows original versification. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information. + ///
+ ///
Filter - train on all or none + ///
* If trainOn or pretranslate is not provided, all corpora will be used for training or pretranslation respectively + ///
* If a corpus is selected for training or pretranslation and neither scriptureRange nor textIds are defined, all of the selected corpus will be used. + ///
* If a corpus is selected for training or pretranslation and an empty scriptureRange or textIds is defined, none of the selected corpus will be used. + ///
* If a corpus is selected for training or pretranslation but no further filters are provided, all selected corpora will be used for training or pretranslation respectively. ///
///
Specify the corpora and textIds/scriptureRanges within those corpora to pretranslate. When a corpus is selected for pretranslation, ///
the following text will be pretranslated: @@ -9819,8 +9849,8 @@ public partial class TranslationBuild [Newtonsoft.Json.JsonProperty("deploymentVersion", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] public string? DeploymentVersion { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("statistics", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public System.Collections.Generic.IList>? Statistics { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("executionData", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IDictionary? ExecutionData { get; set; } = default!; } @@ -9828,12 +9858,15 @@ public partial class TranslationBuild public partial class TrainingCorpus { [Newtonsoft.Json.JsonProperty("corpus", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] public ResourceLink? Corpus { get; set; } = default!; [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] public System.Collections.Generic.IList? TextIds { get; set; } = default!; [Newtonsoft.Json.JsonProperty("scriptureRange", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] public string? ScriptureRange { get; set; } = default!; [Newtonsoft.Json.JsonProperty("parallelCorpus", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] @@ -9866,12 +9899,15 @@ public partial class ParallelCorpusFilter public partial class PretranslateCorpus { [Newtonsoft.Json.JsonProperty("corpus", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] public ResourceLink? Corpus { get; set; } = default!; [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] public System.Collections.Generic.IList? TextIds { get; set; } = default!; [Newtonsoft.Json.JsonProperty("scriptureRange", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] public string? ScriptureRange { get; set; } = default!; [Newtonsoft.Json.JsonProperty("parallelCorpus", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] @@ -9903,12 +9939,15 @@ public partial class TranslationBuildConfig public partial class TrainingCorpusConfig { [Newtonsoft.Json.JsonProperty("corpusId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] public string? CorpusId { get; set; } = default!; [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] public System.Collections.Generic.IList? TextIds { get; set; } = default!; [Newtonsoft.Json.JsonProperty("scriptureRange", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] public string? ScriptureRange { get; set; } = default!; [Newtonsoft.Json.JsonProperty("parallelCorpusId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] @@ -9941,12 +9980,15 @@ public partial class ParallelCorpusFilterConfig public partial class PretranslateCorpusConfig { [Newtonsoft.Json.JsonProperty("corpusId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] public string? CorpusId { get; set; } = default!; [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] public System.Collections.Generic.IList? TextIds { get; set; } = default!; [Newtonsoft.Json.JsonProperty("scriptureRange", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] public string? ScriptureRange { get; set; } = default!; [Newtonsoft.Json.JsonProperty("parallelCorpusId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] diff --git a/src/Serval/src/Serval.Client/Serval.Client.csproj b/src/Serval/src/Serval.Client/Serval.Client.csproj index 4075c023..66ed8ebe 100644 --- a/src/Serval/src/Serval.Client/Serval.Client.csproj +++ b/src/Serval/src/Serval.Client/Serval.Client.csproj @@ -2,7 +2,7 @@ netstandard2.1 - 1.7.0 + 1.7.3 Client classes for Serval. Serval.Client Serval diff --git a/src/Serval/src/Serval.DataFiles/Configuration/IServalBuilderExtensions.cs b/src/Serval/src/Serval.DataFiles/Configuration/IServalBuilderExtensions.cs index 91756a6c..11af65e1 100644 --- a/src/Serval/src/Serval.DataFiles/Configuration/IServalBuilderExtensions.cs +++ b/src/Serval/src/Serval.DataFiles/Configuration/IServalBuilderExtensions.cs @@ -4,10 +4,7 @@ public static class IServalBuilderExtensions { public static IServalBuilder AddDataFiles(this IServalBuilder builder) { - if (builder.Configuration is null) - builder.AddDataFileOptions(o => { }); - else - builder.AddDataFileOptions(builder.Configuration.GetSection(DataFileOptions.Key)); + builder.AddDataFileOptions(builder.Configuration.GetSection(DataFileOptions.Key)); builder.Services.AddScoped(); builder.Services.AddHostedService(); diff --git a/src/Serval/src/Serval.Grpc/Protos/serval/translation/v1/platform.proto b/src/Serval/src/Serval.Grpc/Protos/serval/translation/v1/platform.proto index 385159f0..17788e91 100644 --- a/src/Serval/src/Serval.Grpc/Protos/serval/translation/v1/platform.proto +++ b/src/Serval/src/Serval.Grpc/Protos/serval/translation/v1/platform.proto @@ -14,7 +14,7 @@ service TranslationPlatformApi { rpc IncrementTranslationEngineCorpusSize(IncrementTranslationEngineCorpusSizeRequest) returns (google.protobuf.Empty); rpc InsertPretranslations(stream InsertPretranslationsRequest) returns (google.protobuf.Empty); - rpc UpdateBuildStatistics(UpdateBuildStatisticsRequest) returns (google.protobuf.Empty); + rpc UpdateBuildExecutionData(UpdateBuildExecutionDataRequest) returns (google.protobuf.Empty); } message UpdateBuildStatusRequest { @@ -61,8 +61,8 @@ message InsertPretranslationsRequest { string translation = 5; } -message UpdateBuildStatisticsRequest { +message UpdateBuildExecutionDataRequest { string engine_id = 1; string build_id = 2; - map Statistics = 3; + map ExecutionData = 3; } diff --git a/src/Serval/src/Serval.Shared/Configuration/IServalBuilder.cs b/src/Serval/src/Serval.Shared/Configuration/IServalBuilder.cs index 116fc6d4..f37283e3 100644 --- a/src/Serval/src/Serval.Shared/Configuration/IServalBuilder.cs +++ b/src/Serval/src/Serval.Shared/Configuration/IServalBuilder.cs @@ -3,5 +3,5 @@ public interface IServalBuilder { IServiceCollection Services { get; } - IConfiguration? Configuration { get; } + IConfiguration Configuration { get; } } diff --git a/src/Serval/src/Serval.Shared/Configuration/IServalBuilderExtensions.cs b/src/Serval/src/Serval.Shared/Configuration/IServalBuilderExtensions.cs index 2f226ab4..4a611f25 100644 --- a/src/Serval/src/Serval.Shared/Configuration/IServalBuilderExtensions.cs +++ b/src/Serval/src/Serval.Shared/Configuration/IServalBuilderExtensions.cs @@ -2,27 +2,12 @@ public static class IServalBuilderExtensions { - public static IServalBuilder AddDataFileOptions( - this IServalBuilder builder, - Action configureOptions - ) - { - builder.Services.Configure(configureOptions); - return builder; - } - public static IServalBuilder AddDataFileOptions(this IServalBuilder builder, IConfiguration config) { builder.Services.Configure(config); return builder; } - public static IServalBuilder AddApiOptions(this IServalBuilder builder, Action configureOptions) - { - builder.Services.Configure(configureOptions); - return builder; - } - public static IServalBuilder AddApiOptions(this IServalBuilder builder, IConfiguration config) { builder.Services.Configure(config); @@ -43,7 +28,7 @@ public static IServalBuilder AddMongoDataAccess( Action configure ) { - string? mongoConnectionString = builder.Configuration?.GetConnectionString("Mongo"); + string? mongoConnectionString = builder.Configuration.GetConnectionString("Mongo"); if (mongoConnectionString is null) throw new InvalidOperationException("Mongo connection string not configured"); builder.Services.AddMongoDataAccess(mongoConnectionString, "Serval", configure); diff --git a/src/Serval/src/Serval.Shared/Configuration/IServiceCollectionExtensions.cs b/src/Serval/src/Serval.Shared/Configuration/IServiceCollectionExtensions.cs index 2671ac40..3a7ce339 100644 --- a/src/Serval/src/Serval.Shared/Configuration/IServiceCollectionExtensions.cs +++ b/src/Serval/src/Serval.Shared/Configuration/IServiceCollectionExtensions.cs @@ -2,7 +2,7 @@ public static class IServiceCollectionExtensions { - public static IServalBuilder AddServal(this IServiceCollection services, IConfiguration? configuration = null) + public static IServalBuilder AddServal(this IServiceCollection services, IConfiguration configuration) { services.AddTransient(); services.AddTransient(); diff --git a/src/Serval/src/Serval.Shared/Configuration/ServalBuilder.cs b/src/Serval/src/Serval.Shared/Configuration/ServalBuilder.cs index b4fe3747..48c5123d 100644 --- a/src/Serval/src/Serval.Shared/Configuration/ServalBuilder.cs +++ b/src/Serval/src/Serval.Shared/Configuration/ServalBuilder.cs @@ -1,7 +1,7 @@ namespace Microsoft.Extensions.DependencyInjection; -internal class ServalBuilder(IServiceCollection services, IConfiguration? configuration) : IServalBuilder +internal class ServalBuilder(IServiceCollection services, IConfiguration configuration) : IServalBuilder { public IServiceCollection Services { get; } = services; - public IConfiguration? Configuration { get; } = configuration; + public IConfiguration Configuration { get; } = configuration; } diff --git a/src/Serval/src/Serval.Shared/Serval.Shared.csproj b/src/Serval/src/Serval.Shared/Serval.Shared.csproj index 5af835f5..0974a424 100644 --- a/src/Serval/src/Serval.Shared/Serval.Shared.csproj +++ b/src/Serval/src/Serval.Shared/Serval.Shared.csproj @@ -19,7 +19,7 @@ - + diff --git a/src/Serval/src/Serval.Translation/Configuration/IServalBuilderExtensions.cs b/src/Serval/src/Serval.Translation/Configuration/IServalBuilderExtensions.cs index 190d627f..4e329863 100644 --- a/src/Serval/src/Serval.Translation/Configuration/IServalBuilderExtensions.cs +++ b/src/Serval/src/Serval.Translation/Configuration/IServalBuilderExtensions.cs @@ -5,30 +5,17 @@ namespace Microsoft.Extensions.DependencyInjection; public static class IServalBuilderExtensions { - public static IServalBuilder AddTranslation( - this IServalBuilder builder, - Action? configure = null - ) + public static IServalBuilder AddTranslation(this IServalBuilder builder) { - if (builder.Configuration is null) - { - builder.AddApiOptions(o => { }); - builder.AddDataFileOptions(o => { }); - } - else - { - builder.AddApiOptions(builder.Configuration.GetSection(ApiOptions.Key)); - builder.AddDataFileOptions(builder.Configuration.GetSection(DataFileOptions.Key)); - } + builder.AddApiOptions(builder.Configuration.GetSection(ApiOptions.Key)); + builder.AddDataFileOptions(builder.Configuration.GetSection(DataFileOptions.Key)); builder.Services.AddScoped(); builder.Services.AddScoped(); builder.Services.AddScoped(); var translationOptions = new TranslationOptions(); - builder.Configuration?.GetSection(TranslationOptions.Key).Bind(translationOptions); - if (configure is not null) - configure(translationOptions); + builder.Configuration.GetSection(TranslationOptions.Key).Bind(translationOptions); foreach (EngineInfo engine in translationOptions.Engines) { diff --git a/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusConfigDto.cs b/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusConfigDto.cs index a88ebe3b..58756e3a 100644 --- a/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusConfigDto.cs +++ b/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusConfigDto.cs @@ -2,10 +2,13 @@ public record PretranslateCorpusConfigDto { + [Obsolete] public string? CorpusId { get; init; } + [Obsolete] public IReadOnlyList? TextIds { get; init; } + [Obsolete] public string? ScriptureRange { get; init; } public string? ParallelCorpusId { get; init; } diff --git a/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusDto.cs b/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusDto.cs index 9aa6f939..14fde716 100644 --- a/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusDto.cs +++ b/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusDto.cs @@ -2,10 +2,13 @@ public record PretranslateCorpusDto { + [Obsolete] public ResourceLinkDto? Corpus { get; init; } + [Obsolete] public IReadOnlyList? TextIds { get; init; } + [Obsolete] public string? ScriptureRange { get; init; } public ResourceLinkDto? ParallelCorpus { get; init; } diff --git a/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusConfigDto.cs b/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusConfigDto.cs index c8161a5f..a70bf5ab 100644 --- a/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusConfigDto.cs +++ b/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusConfigDto.cs @@ -2,8 +2,13 @@ namespace Serval.Translation.Contracts; public record TrainingCorpusConfigDto { + [Obsolete] public string? CorpusId { get; init; } + + [Obsolete] public IReadOnlyList? TextIds { get; init; } + + [Obsolete] public string? ScriptureRange { get; init; } public string? ParallelCorpusId { get; init; } diff --git a/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusDto.cs b/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusDto.cs index f734f43b..f958a07b 100644 --- a/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusDto.cs +++ b/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusDto.cs @@ -2,8 +2,13 @@ namespace Serval.Translation.Contracts; public record TrainingCorpusDto { + [Obsolete] public ResourceLinkDto? Corpus { get; init; } + + [Obsolete] public IReadOnlyList? TextIds { get; init; } + + [Obsolete] public string? ScriptureRange { get; init; } public ResourceLinkDto? ParallelCorpus { get; init; } diff --git a/src/Serval/src/Serval.Translation/Contracts/TranslationBuildDto.cs b/src/Serval/src/Serval.Translation/Contracts/TranslationBuildDto.cs index d58339ca..91df3ac7 100644 --- a/src/Serval/src/Serval.Translation/Contracts/TranslationBuildDto.cs +++ b/src/Serval/src/Serval.Translation/Contracts/TranslationBuildDto.cs @@ -28,5 +28,5 @@ public record TranslationBuildDto /// public object? Options { get; init; } public string? DeploymentVersion { get; init; } - public Dictionary[]? Statistics { get; init; } + public IReadOnlyDictionary? ExecutionData { get; init; } } diff --git a/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs b/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs index c0e3d13d..bb9dcb73 100644 --- a/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs +++ b/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs @@ -1,5 +1,7 @@ namespace Serval.Translation.Controllers; +#pragma warning disable CS0612 // Type or member is obsolete + [ApiVersion(1.0)] [Route("api/v{version:apiVersion}/translation/engines")] [OpenApiTag("Translation Engines")] @@ -23,6 +25,7 @@ ILogger logger private readonly IOptionsMonitor _apiOptions = apiOptions; private readonly IUrlService _urlService = urlService; private readonly ILogger _logger = logger; + private readonly IConfiguration _configuration = configuration; /// /// Get all translation engines @@ -314,7 +317,7 @@ await _engineService.TrainSegmentPairAsync( } /// - /// Add a corpus to a translation engine + /// Add a corpus to a translation engine (obsolete - use parallel corpora instead) /// /// /// ## Parameters @@ -345,6 +348,7 @@ await _engineService.TrainSegmentPairAsync( /// The authenticated client cannot perform the operation or does not own the translation engine. /// The engine does not exist. /// A necessary service is currently unavailable. Check `/health` for more details. + [Obsolete("This endpoint is obsolete. Use parallel corpora instead.")] [Authorize(Scopes.UpdateTranslationEngines)] [HttpPost("{id}/corpora")] [ProducesResponseType(StatusCodes.Status201Created)] @@ -370,7 +374,7 @@ CancellationToken cancellationToken } /// - /// Update a corpus with a new set of files + /// Update a corpus with a new set of files (obsolete - use parallel corpora instead) /// /// /// See posting a new corpus for details of use. Will completely replace corpus' file associations. @@ -387,6 +391,7 @@ CancellationToken cancellationToken /// The authenticated client cannot perform the operation or does not own the translation engine. /// The engine or corpus does not exist. /// A necessary service is currently unavailable. Check `/health` for more details. + [Obsolete("This endpoint is obsolete. Use parallel corpora instead.")] [Authorize(Scopes.UpdateTranslationEngines)] [HttpPatch("{id}/corpora/{corpusId}")] [ProducesResponseType(StatusCodes.Status200OK)] @@ -419,7 +424,7 @@ corpusConfig.TargetFiles is null } /// - /// Get all corpora for a translation engine + /// Get all corpora for a translation engine (obsolete - use parallel corpora instead) /// /// The translation engine id /// @@ -428,6 +433,7 @@ corpusConfig.TargetFiles is null /// The authenticated client cannot perform the operation or does not own the translation engine /// The engine does not exist /// A necessary service is currently unavailable. Check `/health` for more details. + [Obsolete("This endpoint is obsolete. Use parallel corpora instead.")] [Authorize(Scopes.ReadTranslationEngines)] [HttpGet("{id}/corpora")] [ProducesResponseType(StatusCodes.Status200OK)] @@ -446,7 +452,7 @@ CancellationToken cancellationToken } /// - /// Get the configuration of a corpus for a translation engine + /// Get the configuration of a corpus for a translation engine (obsolete - use parallel corpora instead) /// /// The translation engine id /// The corpus id @@ -456,6 +462,7 @@ CancellationToken cancellationToken /// The authenticated client cannot perform the operation or does not own the translation engine. /// The engine or corpus does not exist. /// A necessary service is currently unavailable. Check `/health` for more details. + [Obsolete("This endpoint is obsolete. Use parallel corpora instead.")] [Authorize(Scopes.ReadTranslationEngines)] [HttpGet("{id}/corpora/{corpusId}", Name = Endpoints.GetTranslationCorpus)] [ProducesResponseType(StatusCodes.Status200OK)] @@ -699,7 +706,7 @@ CancellationToken cancellationToken } /// - /// Get all pretranslations in a corpus of a translation engine + /// Get all pretranslations in a corpus or parallel corpus of a translation engine /// /// /// Pretranslations are arranged in a list of dictionaries with the following fields per pretranslation: @@ -713,7 +720,7 @@ CancellationToken cancellationToken /// Only pretranslations for the most recent successful build of the engine are returned. /// /// The translation engine id - /// The corpus id + /// The corpus id or parallel corpus id /// The text id (optional) /// /// The pretranslations @@ -739,7 +746,7 @@ CancellationToken cancellationToken { Engine engine = await _engineService.GetAsync(id, cancellationToken); await AuthorizeAsync(engine); - if (!engine.Corpora.Any(c => c.Id == corpusId)) + if (!engine.Corpora.Any(c => c.Id == corpusId) && !engine.ParallelCorpora.Any(c => c.Id == corpusId)) return NotFound(); if (engine.ModelRevision == 0) return Conflict(); @@ -762,7 +769,7 @@ CancellationToken cancellationToken } /// - /// Get all pretranslations for the specified text in a corpus of a translation engine + /// Get all pretranslations for the specified text in a corpus or parallel corpus of a translation engine /// /// /// Pretranslations are arranged in a list of dictionaries with the following fields per pretranslation: @@ -775,7 +782,7 @@ CancellationToken cancellationToken /// Only pretranslations for the most recent successful build of the engine are returned. /// /// The translation engine id - /// The corpus id + /// The corpus id or parallel corpus id /// The text id /// /// The pretranslations @@ -801,7 +808,7 @@ CancellationToken cancellationToken { Engine engine = await _engineService.GetAsync(id, cancellationToken); await AuthorizeAsync(engine); - if (!engine.Corpora.Any(c => c.Id == corpusId)) + if (!engine.Corpora.Any(c => c.Id == corpusId) && !engine.ParallelCorpora.Any(c => c.Id == corpusId)) return NotFound(); if (engine.ModelRevision == 0) return Conflict(); @@ -842,7 +849,7 @@ CancellationToken cancellationToken /// Both scripture and non-scripture text in the USFM is parsed and grouped according to [this wiki](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation). /// /// The translation engine id - /// The corpus id + /// The corpus id or parallel corpus id /// The text id /// The source[s] of the data to populate the USFM file with. /// @@ -876,7 +883,7 @@ CancellationToken cancellationToken { Engine engine = await _engineService.GetAsync(id, cancellationToken); await AuthorizeAsync(engine); - if (!engine.Corpora.Any(c => c.Id == corpusId)) + if (!engine.Corpora.Any(c => c.Id == corpusId) && !engine.ParallelCorpora.Any(c => c.Id == corpusId)) return NotFound(); if (engine.ModelRevision == 0) return Conflict(); @@ -991,10 +998,21 @@ CancellationToken cancellationToken /// Starts a build job for a translation engine. /// /// - /// Specify the corpora and textIds/scriptureRanges within those corpora to train on. Only one type of corpus may be used: either corpora (see /translation/engines/{id}/corpora) or parallel corpora (see /translation/engines/{id}/parallel-corpora). If no "trainOn" field is provided, all corpora will be used. - /// Paratext projects can be filtered by [book](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) using the textId for training. - /// Filters can also be supplied via scriptureRange parameter as ranges of biblical text. See [here](https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range) - /// All Paratext project filtering follows original versification. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information. + /// Specify the corpora and textIds/scriptureRanges within those corpora to train on. Only one type of corpus may be used: either (legacy) corpora (see /translation/engines/{id}/corpora) or parallel corpora (see /translation/engines/{id}/parallel-corpora). + /// Specifying a corpus: + /// * A (legacy) corpus is selected by specifying CorpusId and a parallel corpus is selected by specifying ParallelCorpusId. + /// * A parallel corpus can be further filtered by specifying particular CorpusIds in SourceFilters or TargetFilters. + /// + /// Filtering by textID or chapter: + /// * Paratext projects can be filtered by [book](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) using the textId for training. + /// * Filters can also be supplied via scriptureRange parameter as ranges of biblical text. See [here](https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range) + /// * All Paratext project filtering follows original versification. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information. + /// + /// Filter - train on all or none + /// * If trainOn or pretranslate is not provided, all corpora will be used for training or pretranslation respectively + /// * If a corpus is selected for training or pretranslation and neither scriptureRange nor textIds are defined, all of the selected corpus will be used. + /// * If a corpus is selected for training or pretranslation and an empty scriptureRange or textIds is defined, none of the selected corpus will be used. + /// * If a corpus is selected for training or pretranslation but no further filters are provided, all selected corpora will be used for training or pretranslation respectively. /// /// Specify the corpora and textIds/scriptureRanges within those corpora to pretranslate. When a corpus is selected for pretranslation, /// the following text will be pretranslated: @@ -1036,7 +1054,7 @@ public async Task> StartBuildAsync( CancellationToken cancellationToken ) { - string deploymentVersion = configuration.GetValue("deploymentVersion") ?? "Unknown"; + string deploymentVersion = _configuration.GetValue("deploymentVersion") ?? "Unknown"; Engine engine = await _engineService.GetAsync(id, cancellationToken); await AuthorizeAsync(engine); @@ -1366,6 +1384,24 @@ private static Build Map(Engine engine, TranslationBuildConfigDto source, string $"The parallel corpus {pcc.ParallelCorpusId} is not valid: This parallel corpus does not exist for engine {engine.Id}." ); } + if ( + pcc.SourceFilters != null + && pcc.SourceFilters.Count > 0 + && ( + pcc.SourceFilters.Select(sf => sf.CorpusId).Distinct().Count() > 1 + || pcc.SourceFilters[0].CorpusId + != engine + .ParallelCorpora.Where(pc => pc.Id == pcc.ParallelCorpusId) + .First() + .SourceCorpora[0] + .Id + ) + ) + { + throw new InvalidOperationException( + $"Only the first source corpus in a parallel corpus may be filtered for pretranslation." + ); + } pretranslateCorpora.Add( new PretranslateCorpus { @@ -1512,7 +1548,7 @@ private TranslationBuildDto Map(Build source) DateFinished = source.DateFinished, Options = source.Options, DeploymentVersion = source.DeploymentVersion, - Statistics = source.Statistics + ExecutionData = source.ExecutionData }; } @@ -1731,3 +1767,5 @@ private static ModelDownloadUrlDto Map(ModelDownloadUrl source) }; } } + +#pragma warning restore CS0612 // Type or member is obsolete diff --git a/src/Serval/src/Serval.Translation/Models/Build.cs b/src/Serval/src/Serval.Translation/Models/Build.cs index f1773797..879b6f63 100644 --- a/src/Serval/src/Serval.Translation/Models/Build.cs +++ b/src/Serval/src/Serval.Translation/Models/Build.cs @@ -17,5 +17,5 @@ public record Build : IEntity public DateTime? DateFinished { get; init; } public IReadOnlyDictionary? Options { get; init; } public string? DeploymentVersion { get; init; } - public Dictionary[] Statistics { get; init; } = []; + public IReadOnlyDictionary ExecutionData { get; init; } = new Dictionary(); } diff --git a/src/Serval/src/Serval.Translation/Services/EngineService.cs b/src/Serval/src/Serval.Translation/Services/EngineService.cs index 5b3d08ff..443b2d23 100644 --- a/src/Serval/src/Serval.Translation/Services/EngineService.cs +++ b/src/Serval/src/Serval.Translation/Services/EngineService.cs @@ -1,4 +1,4 @@ -using MassTransit.Mediator; +using MassTransit.Mediator; using Serval.Translation.V1; namespace Serval.Translation.Services; @@ -227,8 +227,19 @@ public async Task StartBuildAsync(Build build, CancellationToken cancellationTok StartBuildRequest request; if (engine.ParallelCorpora.Any()) { - var trainOn = build.TrainOn?.ToDictionary(c => c.ParallelCorpusRef!); - var pretranslate = build.Pretranslate?.ToDictionary(c => c.ParallelCorpusRef!); + Dictionary? trainOn = build.TrainOn?.ToDictionary(c => c.ParallelCorpusRef!); + Dictionary? pretranslate = build.Pretranslate?.ToDictionary(c => + c.ParallelCorpusRef! + ); + IReadOnlyList parallelCorpora = engine + .ParallelCorpora.Where(pc => + trainOn == null + || trainOn.ContainsKey(pc.Id) + || pretranslate == null + || pretranslate.ContainsKey(pc.Id) + ) + .ToList(); + request = new StartBuildRequest { EngineType = engine.Type, @@ -236,16 +247,32 @@ public async Task StartBuildAsync(Build build, CancellationToken cancellationTok BuildId = build.Id, Corpora = { - engine.ParallelCorpora.Select(c => - Map(c, trainOn?.GetValueOrDefault(c.Id), pretranslate?.GetValueOrDefault(c.Id)) + parallelCorpora.Select(c => + Map( + c, + trainOn?.GetValueOrDefault(c.Id), + pretranslate?.GetValueOrDefault(c.Id), + trainOn is null, + pretranslate is null + ) ) } }; } else { - var pretranslate = build.Pretranslate?.ToDictionary(c => c.CorpusRef!); - var trainOn = build.TrainOn?.ToDictionary(c => c.CorpusRef!); + Dictionary? trainOn = build.TrainOn?.ToDictionary(c => c.CorpusRef!); + Dictionary? pretranslate = build.Pretranslate?.ToDictionary(c => + c.CorpusRef! + ); + IReadOnlyList corpora = engine + .Corpora.Where(c => + trainOn == null + || trainOn.ContainsKey(c.Id) + || pretranslate == null + || pretranslate.ContainsKey(c.Id) + ) + .ToList(); request = new StartBuildRequest { @@ -254,8 +281,14 @@ public async Task StartBuildAsync(Build build, CancellationToken cancellationTok BuildId = build.Id, Corpora = { - engine.Corpora.Select(c => - Map(c, trainOn?.GetValueOrDefault(c.Id), pretranslate?.GetValueOrDefault(c.Id)) + corpora.Select(c => + Map( + c, + trainOn?.GetValueOrDefault(c.Id), + pretranslate?.GetValueOrDefault(c.Id), + trainOn is null, + pretranslate is null + ) ) } }; @@ -592,7 +625,13 @@ private Models.WordGraphArc Map(V1.WordGraphArc source) }; } - private V1.ParallelCorpus Map(Corpus source, TrainingCorpus? trainingCorpus, PretranslateCorpus? pretranslateCorpus) + private V1.ParallelCorpus Map( + Corpus source, + TrainingCorpus? trainingCorpus, + PretranslateCorpus? pretranslateCorpus, + bool trainOnAllCorpora, + bool pretranslateOnAllCorpora + ) { IEnumerable sourceFiles = source.SourceFiles.Select(Map); IEnumerable targetFiles = source.TargetFiles.Select(Map); @@ -601,12 +640,15 @@ private V1.ParallelCorpus Map(Corpus source, TrainingCorpus? trainingCorpus, Pre V1.MonolingualCorpus targetCorpus = new() { Language = source.TargetLanguage, Files = { source.TargetFiles.Select(Map) } }; - if (trainingCorpus is null || (trainingCorpus.TextIds is null && trainingCorpus.ScriptureRange is null)) + if ( + trainOnAllCorpora + || (trainingCorpus is not null && trainingCorpus.TextIds is null && trainingCorpus.ScriptureRange is null) + ) { sourceCorpus.TrainOnAll = true; targetCorpus.TrainOnAll = true; } - else + else if (trainingCorpus is not null) { if (trainingCorpus.TextIds is not null && trainingCorpus.ScriptureRange is not null) { @@ -642,14 +684,18 @@ private V1.ParallelCorpus Map(Corpus source, TrainingCorpus? trainingCorpus, Pre } } if ( - pretranslateCorpus is null - || (pretranslateCorpus.TextIds is null && pretranslateCorpus.ScriptureRange is null) + pretranslateOnAllCorpora + || ( + pretranslateCorpus is not null + && pretranslateCorpus.TextIds is null + && pretranslateCorpus.ScriptureRange is null + ) ) { sourceCorpus.PretranslateAll = true; targetCorpus.PretranslateAll = true; } - else + else if (pretranslateCorpus is not null) { if (pretranslateCorpus.TextIds is not null && pretranslateCorpus.ScriptureRange is not null) { @@ -692,7 +738,9 @@ pretranslateCorpus is null private V1.ParallelCorpus Map( Models.ParallelCorpus source, TrainingCorpus? trainingCorpus, - PretranslateCorpus? pretranslateCorpus + PretranslateCorpus? pretranslateCorpus, + bool trainOnAllCorpora, + bool pretranslateOnAllCorpora ) { string? referenceFileLocation = @@ -700,6 +748,15 @@ private V1.ParallelCorpus Map( ? Map(source.TargetCorpora[0].Files[0]).Location : null; + bool trainOnAllSources = + trainOnAllCorpora || (trainingCorpus is not null && trainingCorpus.SourceFilters is null); + bool pretranslateAllSources = + pretranslateOnAllCorpora || (pretranslateCorpus is not null && pretranslateCorpus.SourceFilters is null); + + bool trainOnAllTargets = + trainOnAllCorpora || (trainingCorpus is not null && trainingCorpus.TargetFilters is null); + bool pretranslateAllTargets = pretranslateOnAllCorpora || pretranslateCorpus is not null; // there is no pretranslate Target filter. + return new V1.ParallelCorpus { Id = source.Id, @@ -710,7 +767,9 @@ private V1.ParallelCorpus Map( sc, trainingCorpus?.SourceFilters?.Where(sf => sf.CorpusRef == sc.Id).FirstOrDefault(), pretranslateCorpus?.SourceFilters?.Where(sf => sf.CorpusRef == sc.Id).FirstOrDefault(), - referenceFileLocation + referenceFileLocation, + trainOnAllSources, + pretranslateAllSources ) ) }, @@ -721,7 +780,9 @@ private V1.ParallelCorpus Map( tc, trainingCorpus?.TargetFilters?.Where(sf => sf.CorpusRef == tc.Id).FirstOrDefault(), null, - referenceFileLocation + referenceFileLocation, + trainOnAllTargets, + pretranslateAllTargets ) ) } @@ -729,10 +790,12 @@ private V1.ParallelCorpus Map( } private V1.MonolingualCorpus Map( - Models.MonolingualCorpus source, + Models.MonolingualCorpus inputCorpus, ParallelCorpusFilter? trainingFilter, ParallelCorpusFilter? pretranslateFilter, - string? referenceFileLocation + string? referenceFileLocation, + bool trainOnAll, + bool pretranslateOnAll ) { Dictionary? trainOnChapters = null; @@ -761,7 +824,7 @@ pretranslateFilter is not null && referenceFileLocation is not null ) { - GetChapters(referenceFileLocation, pretranslateFilter.ScriptureRange) + pretranslateChapters = GetChapters(referenceFileLocation, pretranslateFilter.ScriptureRange) .Select( (kvp) => { @@ -773,41 +836,48 @@ pretranslateFilter is not null .ToDictionary(); } - var corpus = new V1.MonolingualCorpus + var returnCorpus = new V1.MonolingualCorpus { - Id = source.Id, - Language = source.Language, - Files = { source.Files.Select(Map) } + Id = inputCorpus.Id, + Language = inputCorpus.Language, + Files = { inputCorpus.Files.Select(Map) } }; - if (trainingFilter is null || (trainingFilter.TextIds is null && trainingFilter.ScriptureRange is null)) + if ( + trainOnAll + || (trainingFilter is not null && trainingFilter.TextIds is null && trainingFilter.ScriptureRange is null) + ) { - corpus.TrainOnAll = true; + returnCorpus.TrainOnAll = true; } else { if (trainOnChapters is not null) - corpus.TrainOnChapters.Add(trainOnChapters); + returnCorpus.TrainOnChapters.Add(trainOnChapters); if (trainingFilter?.TextIds is not null) - corpus.TrainOnTextIds.Add(trainingFilter.TextIds); + returnCorpus.TrainOnTextIds.Add(trainingFilter.TextIds); } if ( - pretranslateFilter is null - || (pretranslateFilter.TextIds is null && pretranslateFilter.ScriptureRange is null) + pretranslateOnAll + || ( + pretranslateFilter is not null + && pretranslateFilter.TextIds is null + && pretranslateFilter.ScriptureRange is null + ) ) { - corpus.PretranslateAll = true; + returnCorpus.PretranslateAll = true; } else { if (pretranslateChapters is not null) - corpus.PretranslateChapters.Add(pretranslateChapters); + returnCorpus.PretranslateChapters.Add(pretranslateChapters); if (pretranslateFilter?.TextIds is not null) - corpus.PretranslateTextIds.Add(pretranslateFilter.TextIds); + returnCorpus.PretranslateTextIds.Add(pretranslateFilter.TextIds); } - return corpus; + return returnCorpus; } private V1.CorpusFile Map(Models.CorpusFile source) diff --git a/src/Serval/src/Serval.Translation/Services/PretranslationService.cs b/src/Serval/src/Serval.Translation/Services/PretranslationService.cs index 48e89b91..1bf552fb 100644 --- a/src/Serval/src/Serval.Translation/Services/PretranslationService.cs +++ b/src/Serval/src/Serval.Translation/Services/PretranslationService.cs @@ -41,11 +41,24 @@ public async Task GetUsfmAsync( { Engine? engine = await _engines.GetAsync(engineId, cancellationToken); Corpus? corpus = engine?.Corpora.SingleOrDefault(c => c.Id == corpusId); - if (corpus is null) - throw new EntityNotFoundException($"Could not find the Corpus '{corpusId}' in Engine '{engineId}'."); + ParallelCorpus? parallelCorpus = engine?.ParallelCorpora.SingleOrDefault(c => c.Id == corpusId); - CorpusFile sourceFile = corpus.SourceFiles[0]; - CorpusFile targetFile = corpus.TargetFiles[0]; + CorpusFile sourceFile; + CorpusFile targetFile; + if (corpus is not null) + { + sourceFile = corpus.SourceFiles[0]; + targetFile = corpus.TargetFiles[0]; + } + else if (parallelCorpus is not null) + { + sourceFile = parallelCorpus.SourceCorpora[0].Files[0]; + targetFile = parallelCorpus.TargetCorpora[0].Files[0]; + } + else + { + throw new EntityNotFoundException($"Could not find the Corpus '{corpusId}' in Engine '{engineId}'."); + } if (sourceFile.Format is not FileFormat.Paratext || targetFile.Format is not FileFormat.Paratext) throw new InvalidOperationException("USFM format is not valid for non-Scripture corpora."); diff --git a/src/Serval/src/Serval.Translation/Services/TranslationPlatformServiceV1.cs b/src/Serval/src/Serval.Translation/Services/TranslationPlatformServiceV1.cs index a8f82edd..6df92248 100644 --- a/src/Serval/src/Serval.Translation/Services/TranslationPlatformServiceV1.cs +++ b/src/Serval/src/Serval.Translation/Services/TranslationPlatformServiceV1.cs @@ -1,4 +1,5 @@ -using Google.Protobuf.WellKnownTypes; +using System.Collections.ObjectModel; +using Google.Protobuf.WellKnownTypes; using Serval.Translation.V1; namespace Serval.Translation.Services; @@ -265,8 +266,8 @@ await _builds.UpdateAsync( return Empty; } - public override async Task UpdateBuildStatistics( - UpdateBuildStatisticsRequest request, + public override async Task UpdateBuildExecutionData( + UpdateBuildExecutionDataRequest request, ServerCallContext context ) { @@ -276,17 +277,16 @@ ServerCallContext context throw new RpcException(new Status(StatusCode.NotFound, "Build not found.")); } - var newStatistics = new Dictionary(); - foreach (var entry in request.Statistics) + var updatedExecutionData = new Dictionary(build.ExecutionData); + + foreach (var entry in request.ExecutionData) { - newStatistics[entry.Key] = entry.Value; + updatedExecutionData[entry.Key] = entry.Value; } - var updatedStatistics = build.Statistics.Concat(new[] { newStatistics }).ToArray(); - await _builds.UpdateAsync( b => b.Id == request.BuildId, - u => u.Set(b => b.Statistics, updatedStatistics), + u => u.Set(b => b.ExecutionData, new ReadOnlyDictionary(updatedExecutionData)), cancellationToken: context.CancellationToken ); diff --git a/src/Serval/src/Serval.Webhooks/Configuration/IServalBuilderExtensions.cs b/src/Serval/src/Serval.Webhooks/Configuration/IServalBuilderExtensions.cs index 129804e3..383e5baf 100644 --- a/src/Serval/src/Serval.Webhooks/Configuration/IServalBuilderExtensions.cs +++ b/src/Serval/src/Serval.Webhooks/Configuration/IServalBuilderExtensions.cs @@ -4,11 +4,7 @@ public static class IServalBuilderExtensions { public static IServalBuilder AddWebhooks(this IServalBuilder builder) { - builder - .Services.AddHttpClient() - .AddTransientHttpErrorPolicy(b => - b.WaitAndRetryAsync(3, retryAttempt => TimeSpan.FromSeconds(Math.Pow(2, retryAttempt))) - ); + builder.Services.AddHttpClient(); builder.Services.AddScoped(); return builder; } diff --git a/src/Serval/src/Serval.Webhooks/Serval.Webhooks.csproj b/src/Serval/src/Serval.Webhooks/Serval.Webhooks.csproj index 44f1ef4d..4f9fa6d8 100644 --- a/src/Serval/src/Serval.Webhooks/Serval.Webhooks.csproj +++ b/src/Serval/src/Serval.Webhooks/Serval.Webhooks.csproj @@ -14,7 +14,6 @@ - diff --git a/src/Serval/src/Serval.Webhooks/Services/WebhookJob.cs b/src/Serval/src/Serval.Webhooks/Services/WebhookJob.cs index faee17d4..384ba6be 100644 --- a/src/Serval/src/Serval.Webhooks/Services/WebhookJob.cs +++ b/src/Serval/src/Serval.Webhooks/Services/WebhookJob.cs @@ -6,6 +6,32 @@ public class WebhookJob(IRepository hooks, HttpClient httpClient, IOpti private readonly HttpClient _httpClient = httpClient; private readonly JsonOptions _jsonOptions = jsonOptions.Value; + [AutomaticRetry( + Attempts = 20, + DelaysInSeconds = new[] + { + 1, + 2, + 4, + 8, + 16, + 32, + 64, + 128, + 256, + 512, + 1024, + 2048, + 2048, + 2048, + 2048, + 2048, + 2048, + 2048, + 2048 + }, + LogEvents = true + )] public async Task RunAsync( WebhookEvent webhookEvent, string owner, diff --git a/src/Serval/src/Serval.Webhooks/Usings.cs b/src/Serval/src/Serval.Webhooks/Usings.cs index f68d9a61..39f9b6a5 100644 --- a/src/Serval/src/Serval.Webhooks/Usings.cs +++ b/src/Serval/src/Serval.Webhooks/Usings.cs @@ -11,7 +11,6 @@ global using Microsoft.AspNetCore.Mvc; global using Microsoft.AspNetCore.Routing; global using Microsoft.Extensions.Options; -global using Polly; global using Serval.Shared.Contracts; global using Serval.Shared.Controllers; global using Serval.Shared.Models; diff --git a/src/Serval/test/Serval.ApiServer.IntegrationTests/TranslationEngineTests.cs b/src/Serval/test/Serval.ApiServer.IntegrationTests/TranslationEngineTests.cs index b4c13c7d..d66b3557 100644 --- a/src/Serval/test/Serval.ApiServer.IntegrationTests/TranslationEngineTests.cs +++ b/src/Serval/test/Serval.ApiServer.IntegrationTests/TranslationEngineTests.cs @@ -5,6 +5,8 @@ namespace Serval.ApiServer; +#pragma warning disable CS0612 // Type or member is obsolete + [TestFixture] [Category("Integration")] public class TranslationEngineTests @@ -28,7 +30,15 @@ public class TranslationEngineTests new() { Name = "TestCorpus", - SourceCorpusIds = [SOURCE_CORPUS_ID], + SourceCorpusIds = [SOURCE_CORPUS_ID_1], + TargetCorpusIds = [TARGET_CORPUS_ID], + }; + + private static readonly TranslationParallelCorpusConfig TestMixedParallelCorpusConfig = + new() + { + Name = "TestCorpus", + SourceCorpusIds = [SOURCE_CORPUS_ID_1, SOURCE_CORPUS_ID_2], TargetCorpusIds = [TARGET_CORPUS_ID], }; private static readonly TranslationCorpusConfig TestCorpusConfigNonEcho = @@ -70,8 +80,9 @@ public class TranslationEngineTests private const string FILE3_FILENAME = "file_c"; private const string FILE4_ID = "f00000000000000000000004"; private const string FILE4_FILENAME = "file_d"; - private const string SOURCE_CORPUS_ID = "cc0000000000000000000001"; - private const string TARGET_CORPUS_ID = "cc0000000000000000000002"; + private const string SOURCE_CORPUS_ID_1 = "cc0000000000000000000001"; + private const string SOURCE_CORPUS_ID_2 = "cc0000000000000000000002"; + private const string TARGET_CORPUS_ID = "cc0000000000000000000003"; private const string DOES_NOT_EXIST_ENGINE_ID = "e00000000000000000000004"; private const string DOES_NOT_EXIST_CORPUS_ID = "c00000000000000000000001"; @@ -170,7 +181,14 @@ public async Task SetUp() var srcCorpus = new DataFiles.Models.Corpus { - Id = SOURCE_CORPUS_ID, + Id = SOURCE_CORPUS_ID_1, + Language = "en", + Owner = "client1", + Files = [new() { File = srcFile, TextId = "all" }] + }; + var srcCorpus2 = new DataFiles.Models.Corpus + { + Id = SOURCE_CORPUS_ID_2, Language = "en", Owner = "client1", Files = [new() { File = srcFile, TextId = "all" }] @@ -182,7 +200,7 @@ public async Task SetUp() Owner = "client1", Files = [new() { File = trgFile, TextId = "all" }] }; - await _env.Corpora.InsertAllAsync([srcCorpus, trgCorpus]); + await _env.Corpora.InsertAllAsync([srcCorpus, srcCorpus2, trgCorpus]); } [Test] @@ -813,7 +831,7 @@ public async Task AddParallelCorpusToEngineByIdAsync() ); Assert.Multiple(() => { - Assert.That(result.SourceCorpora.First().Id, Is.EqualTo(SOURCE_CORPUS_ID)); + Assert.That(result.SourceCorpora.First().Id, Is.EqualTo(SOURCE_CORPUS_ID_1)); Assert.That(result.TargetCorpora.First().Id, Is.EqualTo(TARGET_CORPUS_ID)); }); Engine? engine = await _env.Engines.GetAsync(ECHO_ENGINE1_ID); @@ -861,7 +879,7 @@ public async Task UpdateParallelCorpusByIdForEngineByIdAsync() ); var updateConfig = new TranslationParallelCorpusUpdateConfig { - SourceCorpusIds = [SOURCE_CORPUS_ID], + SourceCorpusIds = [SOURCE_CORPUS_ID_1], TargetCorpusIds = [TARGET_CORPUS_ID] }; await client.UpdateParallelCorpusAsync(ECHO_ENGINE1_ID, result.Id, updateConfig); @@ -883,7 +901,7 @@ public void UpdateParallelCorpusByIdForEngineById_NoSuchCorpus() { var updateConfig = new TranslationParallelCorpusUpdateConfig { - SourceCorpusIds = [SOURCE_CORPUS_ID], + SourceCorpusIds = [SOURCE_CORPUS_ID_1], TargetCorpusIds = [TARGET_CORPUS_ID] }; await client.UpdateParallelCorpusAsync(ECHO_ENGINE1_ID, DOES_NOT_EXIST_CORPUS_ID, updateConfig); @@ -900,10 +918,10 @@ public void UpdateParallelCorpusByIdForEngineById_NoSuchEngine() { var updateConfig = new TranslationParallelCorpusUpdateConfig { - SourceCorpusIds = [SOURCE_CORPUS_ID], + SourceCorpusIds = [SOURCE_CORPUS_ID_1], TargetCorpusIds = [TARGET_CORPUS_ID] }; - await client.UpdateParallelCorpusAsync(DOES_NOT_EXIST_ENGINE_ID, SOURCE_CORPUS_ID, updateConfig); + await client.UpdateParallelCorpusAsync(DOES_NOT_EXIST_ENGINE_ID, SOURCE_CORPUS_ID_1, updateConfig); }); Assert.That(ex?.StatusCode, Is.EqualTo(404)); } @@ -917,7 +935,7 @@ public void UpdateParallelCorpusByIdForEngineById_NotAuthorized() { var updateConfig = new TranslationParallelCorpusUpdateConfig { - SourceCorpusIds = [SOURCE_CORPUS_ID], + SourceCorpusIds = [SOURCE_CORPUS_ID_1], TargetCorpusIds = [TARGET_CORPUS_ID] }; await client.UpdateParallelCorpusAsync(ECHO_ENGINE1_ID, DOES_NOT_EXIST_CORPUS_ID, updateConfig); @@ -1010,7 +1028,7 @@ public void GetParallelCorpusByIdForEngineById_NoSuchEngine() { TranslationParallelCorpus result_afterAdd = await client.GetParallelCorpusAsync( DOES_NOT_EXIST_ENGINE_ID, - SOURCE_CORPUS_ID + SOURCE_CORPUS_ID_1 ); }); Assert.That(ex?.StatusCode, Is.EqualTo(404)); @@ -1085,7 +1103,7 @@ public void DeleteParallelCorpusByIdForEngineById_NoSuchEngine() ServalApiException? ex = Assert.ThrowsAsync(async () => { - await client.DeleteParallelCorpusAsync(DOES_NOT_EXIST_ENGINE_ID, SOURCE_CORPUS_ID); + await client.DeleteParallelCorpusAsync(DOES_NOT_EXIST_ENGINE_ID, SOURCE_CORPUS_ID_1); }); Assert.That(ex?.StatusCode, Is.EqualTo(404)); } @@ -1097,7 +1115,7 @@ public void DeleteParallelCorpusByIdForEngineById_NotAuthorized() ServalApiException? ex = Assert.ThrowsAsync(async () => { - await client.DeleteParallelCorpusAsync(ECHO_ENGINE1_ID, SOURCE_CORPUS_ID); + await client.DeleteParallelCorpusAsync(ECHO_ENGINE1_ID, SOURCE_CORPUS_ID_1); }); Assert.That(ex?.StatusCode, Is.EqualTo(403)); } @@ -1581,13 +1599,13 @@ public async Task StartBuild_ParallelCorpus() new() { ParallelCorpusId = addedCorpus.Id, - SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID, TextIds = ["all"] }] + SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID_1, TextIds = ["all"] }] }; TrainingCorpusConfig tcc = new() { ParallelCorpusId = addedCorpus.Id, - SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID, TextIds = ["all"] }], + SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID_1, TextIds = ["all"] }], TargetFilters = [new() { CorpusId = TARGET_CORPUS_ID, TextIds = ["all"] }] }; ; @@ -1628,13 +1646,13 @@ public async Task StartBuildAsync_ParallelCorpus() new() { ParallelCorpusId = addedCorpus.Id, - SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID, TextIds = ["all"] }] + SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID_1, TextIds = ["all"] }] }; TrainingCorpusConfig tcc = new() { ParallelCorpusId = addedCorpus.Id, - SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID, TextIds = ["all"] }], + SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID_1, TextIds = ["all"] }], TargetFilters = [new() { CorpusId = TARGET_CORPUS_ID, TextIds = ["all"] }] }; ; @@ -1669,12 +1687,12 @@ public async Task StartBuildAsync_Corpus_NoFilter() TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); TranslationCorpus addedCorpus = await client.AddCorpusAsync(NMT_ENGINE1_ID, TestCorpusConfig); PretranslateCorpusConfig ptcc = - new() { CorpusId = addedCorpus.Id, SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID }] }; + new() { CorpusId = addedCorpus.Id, SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID_1 }] }; TrainingCorpusConfig tcc = new() { CorpusId = addedCorpus.Id, - SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID }], + SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID_1 }], TargetFilters = [new() { CorpusId = TARGET_CORPUS_ID }] }; ; @@ -1720,12 +1738,12 @@ public async Task StartBuildAsync_ParallelCorpus_NoFilter() TestParallelCorpusConfig ); PretranslateCorpusConfig ptcc = - new() { ParallelCorpusId = addedCorpus.Id, SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID }] }; + new() { ParallelCorpusId = addedCorpus.Id, SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID_1 }] }; TrainingCorpusConfig tcc = new() { ParallelCorpusId = addedCorpus.Id, - SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID }], + SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID_1 }], TargetFilters = [new() { CorpusId = TARGET_CORPUS_ID }] }; ; @@ -1806,7 +1824,7 @@ public async Task StartBuildAsync_ParallelCorpus_PretranslateNoCorpusSpecified() TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); TranslationParallelCorpus addedParallelCorpus = await client.AddParallelCorpusAsync( NMT_ENGINE1_ID, - TestParallelCorpusConfig + TestMixedParallelCorpusConfig ); PretranslateCorpusConfig ptcc = new() { }; TrainingCorpusConfig tcc = new() { ParallelCorpusId = addedParallelCorpus.Id }; @@ -1818,6 +1836,32 @@ public async Task StartBuildAsync_ParallelCorpus_PretranslateNoCorpusSpecified() }); } + [Test] + public async Task StartBuildAsync_ParallelCorpus_PretranslateFilterOnMultipleSources() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); + TranslationParallelCorpus addedParallelCorpus = await client.AddParallelCorpusAsync( + NMT_ENGINE1_ID, + TestParallelCorpusConfig + ); + PretranslateCorpusConfig ptcc = + new() + { + ParallelCorpusId = addedParallelCorpus.Id, + SourceFilters = + [ + new ParallelCorpusFilterConfig() { CorpusId = SOURCE_CORPUS_ID_1 }, + new ParallelCorpusFilterConfig() { CorpusId = SOURCE_CORPUS_ID_2 } + ] + }; + TrainingCorpusConfig tcc = new() { ParallelCorpusId = addedParallelCorpus.Id }; + TranslationBuildConfig tbc = new TranslationBuildConfig { Pretranslate = [ptcc], TrainOn = [tcc] }; + Assert.ThrowsAsync(async () => + { + await client.StartBuildAsync(NMT_ENGINE1_ID, tbc); + }); + } + [Test] public async Task StartBuildAsync_ParallelCorpus_TrainOnNoCorpusSpecified() { @@ -2337,3 +2381,5 @@ protected override void DisposeManagedResources() } } } + +#pragma warning restore CS0612 // Type or member is obsolete diff --git a/src/Serval/test/Serval.E2ETests/ServalApiTests.cs b/src/Serval/test/Serval.E2ETests/ServalApiTests.cs index 22bfb628..d2d0200e 100644 --- a/src/Serval/test/Serval.E2ETests/ServalApiTests.cs +++ b/src/Serval/test/Serval.E2ETests/ServalApiTests.cs @@ -1,5 +1,7 @@ namespace Serval.E2ETests; +#pragma warning disable CS0612 // Type or member is obsolete + [TestFixture] [Category("E2E")] public class ServalApiTests @@ -115,18 +117,32 @@ public async Task NmtBatch() string[] books = ["MAT.txt", "1JN.txt", "2JN.txt"]; string cId1 = await _helperClient.AddTextCorpusToEngineAsync(engineId, books, "es", "en", false); _helperClient.TranslationBuildConfig.TrainOn = [new() { CorpusId = cId1, TextIds = ["1JN.txt"] }]; - string cId2 = await _helperClient.AddTextCorpusToEngineAsync(engineId, ["3JN.txt"], "es", "en", true); + string cId2 = await _helperClient.AddTextCorpusToEngineAsync( + engineId, + ["2JN.txt", "3JN.txt"], + "es", + "en", + true + ); + _helperClient.TranslationBuildConfig.Pretranslate = [new() { CorpusId = cId2, TextIds = ["2JN.txt"] }]; await _helperClient.BuildEngineAsync(engineId); await Task.Delay(1000); - IList lTrans = await _helperClient.TranslationEnginesClient.GetAllPretranslationsAsync( + IList lTrans1 = await _helperClient.TranslationEnginesClient.GetAllPretranslationsAsync( + engineId, + cId1 + ); + Assert.That(lTrans1, Has.Count.EqualTo(0)); // should be nothing + IList lTrans2 = await _helperClient.TranslationEnginesClient.GetAllPretranslationsAsync( engineId, cId2 ); - Assert.That(lTrans, Has.Count.EqualTo(14)); TranslationBuild build = await _helperClient.TranslationEnginesClient.GetCurrentBuildAsync(engineId); - Assert.That(build.Statistics, Is.Not.Null); - Assert.That(build.Statistics, Contains.Key("trainCount")); + Assert.That(build.ExecutionData, Is.Not.Null); + Assert.That(build.ExecutionData, Contains.Key("trainCount")); + Assert.That(build.ExecutionData, Contains.Key("pretranslateCount")); + + Assert.That(lTrans2, Has.Count.EqualTo(13)); // just 2 John } [Test] @@ -135,14 +151,26 @@ public async Task NmtQueueMultiple() const int NUM_ENGINES = 10; const int NUM_WORKERS = 8; string[] engineIds = new string[NUM_ENGINES]; + string[] books = ["MAT.txt", "1JN.txt", "2JN.txt"]; + TranslationParallelCorpusConfig train_corpus = await _helperClient.MakeParallelTextCorpus( + books, + "es", + "en", + false + ); + TranslationParallelCorpusConfig pretranslate_corpus = await _helperClient.MakeParallelTextCorpus( + ["3JN.txt"], + "es", + "en", + true + ); for (int i = 0; i < NUM_ENGINES; i++) { _helperClient.InitTranslationBuildConfig(); engineIds[i] = await _helperClient.CreateNewEngineAsync("Nmt", "es", "en", $"NMT1_{i}"); string engineId = engineIds[i]; - string[] books = ["MAT.txt", "1JN.txt", "2JN.txt"]; - await _helperClient.AddParallelTextCorpusToEngineAsync(engineId, books, "es", "en", false); - await _helperClient.AddParallelTextCorpusToEngineAsync(engineId, ["3JN.txt"], "es", "en", true); + await _helperClient.AddParallelTextCorpusToEngineAsync(engineId, train_corpus, false); + await _helperClient.AddParallelTextCorpusToEngineAsync(engineId, pretranslate_corpus, true); await _helperClient.StartBuildAsync(engineId); //Ensure that tasks are enqueued roughly in order await Task.Delay(1_000); @@ -205,15 +233,27 @@ public async Task NmtLargeBatchAndDownload() TranslationEngine engine = await _helperClient.TranslationEnginesClient.GetAsync(engineId); Assert.That(engine.IsModelPersisted, Is.True); string[] books = ["bible_LARGEFILE.txt"]; - await _helperClient.AddTextCorpusToEngineAsync(engineId, books, "es", "en", false); - string cId = await _helperClient.AddTextCorpusToEngineAsync(engineId, ["3JN.txt"], "es", "en", true); + TranslationParallelCorpusConfig train_corpus = await _helperClient.MakeParallelTextCorpus( + books, + "es", + "en", + false + ); + TranslationParallelCorpusConfig pretranslate_corpus = await _helperClient.MakeParallelTextCorpus( + ["3JN.txt"], + "es", + "en", + true + ); + await _helperClient.AddParallelTextCorpusToEngineAsync(engineId, train_corpus, false); + string cId = await _helperClient.AddParallelTextCorpusToEngineAsync(engineId, pretranslate_corpus, true); await _helperClient.BuildEngineAsync(engineId); await Task.Delay(1000); IList lTrans = await _helperClient.TranslationEnginesClient.GetAllPretranslationsAsync( engineId, cId ); - TestContext.WriteLine(lTrans[0].Translation); + Assert.That(lTrans, Has.Count.EqualTo(14)); // Download the model from the s3 bucket ModelDownloadUrl url = await _helperClient.TranslationEnginesClient.GetModelDownloadUrlAsync(engineId); using Task s = new HttpClient().GetStreamAsync(url.Url); @@ -251,13 +291,8 @@ public async Task CircuitousRouteGetWordGraphAsync() Assert.That(ex.StatusCode, Is.EqualTo(409)); //Add corpus - string cId = await _helperClient.AddParallelTextCorpusToEngineAsync( - smtEngineId, - ["2JN.txt", "3JN.txt"], - "es", - "en", - false - ); + var corpus1 = await _helperClient.MakeParallelTextCorpus(["2JN.txt", "3JN.txt"], "es", "en", false); + string cId = await _helperClient.AddParallelTextCorpusToEngineAsync(smtEngineId, corpus1, false); //Build the new engine await _helperClient.BuildEngineAsync(smtEngineId); @@ -266,13 +301,8 @@ public async Task CircuitousRouteGetWordGraphAsync() await _helperClient.TranslationEnginesClient.DeleteParallelCorpusAsync(smtEngineId, cId); // Add corpus - await _helperClient.AddParallelTextCorpusToEngineAsync( - smtEngineId, - ["1JN.txt", "2JN.txt", "3JN.txt"], - "es", - "en", - false - ); + var corpus2 = await _helperClient.MakeParallelTextCorpus(["1JN.txt", "2JN.txt", "3JN.txt"], "es", "en", false); + await _helperClient.AddParallelTextCorpusToEngineAsync(smtEngineId, corpus2, false); //Build the new engine await _helperClient.BuildEngineAsync(smtEngineId); @@ -428,6 +458,12 @@ public async Task ParatextProjectNmtJobAsync() corpus.Id ); Assert.That(lTrans, Is.Not.Empty); + string usfm = await _helperClient.TranslationEnginesClient.GetPretranslatedUsfmAsync( + engineId, + corpus.Id, + "JHN" + ); + Assert.That(usfm, Does.Contain("\\v 1")); } [TearDown] @@ -442,3 +478,5 @@ public async Task OneTimeTearDown() await _helperClient.DisposeAsync(); } } + +#pragma warning restore CS0612 // Type or member is obsolete diff --git a/src/Serval/test/Serval.E2ETests/ServalClientHelper.cs b/src/Serval/test/Serval.E2ETests/ServalClientHelper.cs index d64fb15a..d489cf9a 100644 --- a/src/Serval/test/Serval.E2ETests/ServalClientHelper.cs +++ b/src/Serval/test/Serval.E2ETests/ServalClientHelper.cs @@ -1,5 +1,7 @@ namespace Serval.E2ETests; +#pragma warning disable CS0612 // Type or member is obsolete + public class ServalClientHelper : IAsyncDisposable { public DataFilesClient DataFilesClient { get; } @@ -231,8 +233,7 @@ bool pretranslate return response.Id; } - public async Task AddParallelTextCorpusToEngineAsync( - string engineId, + public async Task MakeParallelTextCorpus( string[] filesToAdd, string sourceLanguage, string targetLanguage, @@ -290,12 +291,21 @@ bool pretranslate TranslationParallelCorpusConfig parallelCorpusConfig = new() { SourceCorpusIds = { sourceCorpus.Id }, TargetCorpusIds = { targetCorpus.Id } }; + return parallelCorpusConfig; + } + + public async Task AddParallelTextCorpusToEngineAsync( + string engineId, + TranslationParallelCorpusConfig parallelCorpusConfig, + bool pretranslate + ) + { var parallelCorpus = await TranslationEnginesClient.AddParallelCorpusAsync(engineId, parallelCorpusConfig); if (pretranslate) { TranslationBuildConfig.Pretranslate!.Add( - new PretranslateCorpusConfig { ParallelCorpusId = parallelCorpus.Id, TextIds = filesToAdd.ToList() } + new PretranslateCorpusConfig { ParallelCorpusId = parallelCorpus.Id } ); } @@ -408,3 +418,5 @@ public ValueTask DisposeAsync() return new ValueTask(Task.CompletedTask); } } + +#pragma warning restore CS0612 // Type or member is obsolete diff --git a/src/Serval/test/Serval.Translation.Tests/Services/EngineServiceTests.cs b/src/Serval/test/Serval.Translation.Tests/Services/EngineServiceTests.cs index a71e8908..0da83cf1 100644 --- a/src/Serval/test/Serval.Translation.Tests/Services/EngineServiceTests.cs +++ b/src/Serval/test/Serval.Translation.Tests/Services/EngineServiceTests.cs @@ -392,6 +392,200 @@ await env.Service.StartBuildAsync( ); } + [Test] + public async Task StartBuildAsync_OneOfMultipleCorpora() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateMultipleCorporaEngineWithTextFilesAsync()).Id; + await env.Service.StartBuildAsync( + new Build + { + Id = BUILD1_ID, + EngineRef = engineId, + TrainOn = [new TrainingCorpus { CorpusRef = "corpus1" }], + Pretranslate = [new PretranslateCorpus { CorpusRef = "corpus1" }] + } + ); + _ = env.TranslationServiceClient.Received() + .StartBuildAsync( + new StartBuildRequest + { + BuildId = BUILD1_ID, + EngineId = engineId, + EngineType = "Smt", + Corpora = + { + new V1.ParallelCorpus + { + Id = "corpus1", + SourceCorpora = + { + new List + { + new() + { + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file1.txt", + Format = FileFormat.Text, + TextId = "text1" + } + }, + PretranslateAll = true, + TrainOnAll = true + } + } + }, + TargetCorpora = + { + new List + { + new() + { + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file2.txt", + Format = FileFormat.Text, + TextId = "text1" + } + }, + PretranslateAll = true, + TrainOnAll = true + } + } + } + } + } + } + ); + } + + [Test] + public async Task StartBuildAsync_TrainOnOnePretranslateTheOther() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateMultipleCorporaEngineWithTextFilesAsync()).Id; + await env.Service.StartBuildAsync( + new Build + { + Id = BUILD1_ID, + EngineRef = engineId, + TrainOn = [new TrainingCorpus { CorpusRef = "corpus1" }], + Pretranslate = [new PretranslateCorpus { CorpusRef = "corpus2" }] + } + ); + _ = env.TranslationServiceClient.Received() + .StartBuildAsync( + new StartBuildRequest + { + BuildId = BUILD1_ID, + EngineId = engineId, + EngineType = "Smt", + Corpora = + { + new V1.ParallelCorpus + { + Id = "corpus1", + SourceCorpora = + { + new List + { + new() + { + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file1.txt", + Format = FileFormat.Text, + TextId = "text1" + } + }, + PretranslateAll = false, + TrainOnAll = true + } + } + }, + TargetCorpora = + { + new List + { + new() + { + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file2.txt", + Format = FileFormat.Text, + TextId = "text1" + } + }, + PretranslateAll = false, + TrainOnAll = true + } + } + } + }, + new V1.ParallelCorpus + { + Id = "corpus2", + SourceCorpora = + { + new List + { + new() + { + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file3.txt", + Format = FileFormat.Text, + TextId = "text1" + } + }, + PretranslateAll = true, + TrainOnAll = false + } + } + }, + TargetCorpora = + { + new List + { + new() + { + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file4.txt", + Format = FileFormat.Text, + TextId = "text1" + } + }, + PretranslateAll = true, + TrainOnAll = false + } + } + } + } + } + } + ); + } + [Test] public async Task StartBuildAsync_TextFilesScriptureRangeSpecified() { @@ -529,21 +723,250 @@ await env.Service.StartBuildAsync( { new V1.ParallelCorpus { - Id = "corpus1", + Id = "corpus1", + SourceCorpora = + { + new List + { + new() + { + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file1.zip", + Format = FileFormat.Paratext, + TextId = "file1.zip" + } + }, + PretranslateAll = true, + TrainOnAll = false + } + } + }, + TargetCorpora = + { + new List + { + new() + { + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file2.zip", + Format = FileFormat.Paratext, + TextId = "file2.zip" + } + }, + PretranslateAll = true, + TrainOnAll = false + } + } + } + } + } + } + ); + } + + [Test] + public async Task StartBuildAsync_ParallelCorpus_TextFiles() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateParallelCorpusEngineWithTextFilesAsync()).Id; + await env.Service.StartBuildAsync( + new Build + { + Id = BUILD1_ID, + EngineRef = engineId, + TrainOn = + [ + new TrainingCorpus + { + ParallelCorpusRef = "parallel-corpus1", + SourceFilters = new List() + { + new() + { + CorpusRef = "parallel-corpus1-source1", + TextIds = new List { "MAT" } + } + }, + TargetFilters = new List() + { + new() + { + CorpusRef = "parallel-corpus1-target1", + TextIds = new List { "MAT" } + } + } + } + ] + } + ); + _ = env.TranslationServiceClient.Received() + .StartBuildAsync( + new StartBuildRequest + { + BuildId = BUILD1_ID, + EngineId = engineId, + EngineType = "Smt", + Corpora = + { + new V1.ParallelCorpus + { + Id = "parallel-corpus1", + SourceCorpora = + { + new List + { + new() + { + Id = "parallel-corpus1-source1", + Language = "es", + TrainOnTextIds = { "MAT" }, + Files = + { + new V1.CorpusFile + { + Location = "file1.txt", + Format = FileFormat.Text, + TextId = "MAT" + } + }, + PretranslateAll = true, + TrainOnAll = false + }, + new() + { + Id = "parallel-corpus1-source2", + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file3.txt", + Format = FileFormat.Text, + TextId = "MRK" + } + }, + PretranslateAll = true, + TrainOnAll = false + } + } + }, + TargetCorpora = + { + new List + { + new() + { + Id = "parallel-corpus1-target1", + Language = "en", + TrainOnTextIds = { "MAT" }, + Files = + { + new V1.CorpusFile + { + Location = "file2.txt", + Format = FileFormat.Text, + TextId = "MAT" + } + }, + PretranslateAll = true, + TrainOnAll = false + }, + new() + { + Id = "parallel-corpus1-target2", + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file4.txt", + Format = FileFormat.Text, + TextId = "MRK" + } + }, + PretranslateAll = true, + TrainOnAll = false + } + } + } + } + } + } + ); + } + + [Test] + public async Task StartBuildAsync_ParallelCorpus_OneOfMultipleCorpora() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateMultipleParallelCorpusEngineWithTextFilesAsync()).Id; + await env.Service.StartBuildAsync( + new Build + { + Id = BUILD1_ID, + EngineRef = engineId, + TrainOn = + [ + new TrainingCorpus + { + ParallelCorpusRef = "parallel-corpus1", + SourceFilters = new List() + { + new() + { + CorpusRef = "parallel-corpus1-source1", + TextIds = new List { "MAT" } + } + }, + TargetFilters = new List() + { + new() + { + CorpusRef = "parallel-corpus1-target1", + TextIds = new List { "MAT" } + } + } + } + ], + Pretranslate = [new PretranslateCorpus { ParallelCorpusRef = "parallel-corpus1" }] + } + ); + _ = env.TranslationServiceClient.Received() + .StartBuildAsync( + new StartBuildRequest + { + BuildId = BUILD1_ID, + EngineId = engineId, + EngineType = "Smt", + Corpora = + { + new V1.ParallelCorpus + { + Id = "parallel-corpus1", SourceCorpora = { new List { new() { + Id = "parallel-corpus1-source1", Language = "es", + TrainOnTextIds = { "MAT" }, Files = { new V1.CorpusFile { - Location = "file1.zip", - Format = FileFormat.Paratext, - TextId = "file1.zip" + Location = "file1.txt", + Format = FileFormat.Text, + TextId = "MAT" } }, PretranslateAll = true, @@ -557,14 +980,16 @@ await env.Service.StartBuildAsync( { new() { + Id = "parallel-corpus1-target1", Language = "en", + TrainOnTextIds = { "MAT" }, Files = { new V1.CorpusFile { - Location = "file2.zip", - Format = FileFormat.Paratext, - TextId = "file2.zip" + Location = "file2.txt", + Format = FileFormat.Text, + TextId = "MAT" } }, PretranslateAll = true, @@ -579,10 +1004,10 @@ await env.Service.StartBuildAsync( } [Test] - public async Task StartBuildAsync_ParallelCorpus_TextFiles() + public async Task StartBuildAsync_ParallelCorpus_TrainOnOnePretranslateTheOther() { var env = new TestEnvironment(); - string engineId = (await env.CreateParallelCorpusEngineWithTextFilesAsync()).Id; + string engineId = (await env.CreateMultipleParallelCorpusEngineWithTextFilesAsync()).Id; await env.Service.StartBuildAsync( new Build { @@ -610,7 +1035,8 @@ await env.Service.StartBuildAsync( } } } - ] + ], + Pretranslate = [new PretranslateCorpus { ParallelCorpusRef = "parallel-corpus2" }] } ); _ = env.TranslationServiceClient.Received() @@ -643,51 +1069,67 @@ await env.Service.StartBuildAsync( TextId = "MAT" } }, - PretranslateAll = true, + PretranslateAll = false, TrainOnAll = false - }, + } + } + }, + TargetCorpora = + { + new List + { new() { - Id = "parallel-corpus1-source2", - Language = "es", + Id = "parallel-corpus1-target1", + Language = "en", + TrainOnTextIds = { "MAT" }, Files = { new V1.CorpusFile { - Location = "file3.txt", + Location = "file2.txt", Format = FileFormat.Text, - TextId = "MRK" + TextId = "MAT" } }, - PretranslateAll = true, - TrainOnAll = true + PretranslateAll = false, + TrainOnAll = false } } - }, - TargetCorpora = + } + }, + new V1.ParallelCorpus + { + Id = "parallel-corpus2", + SourceCorpora = { new List { new() { - Id = "parallel-corpus1-target1", - Language = "en", - TrainOnTextIds = { "MAT" }, + Id = "parallel-corpus2-source1", + Language = "es", Files = { new V1.CorpusFile { - Location = "file2.txt", + Location = "file3.txt", Format = FileFormat.Text, - TextId = "MAT" + TextId = "MRK" } }, PretranslateAll = true, TrainOnAll = false - }, + } + } + }, + TargetCorpora = + { + new List + { new() { - Id = "parallel-corpus1-target2", + Id = "parallel-corpus2-target1", Language = "en", Files = { @@ -699,7 +1141,7 @@ await env.Service.StartBuildAsync( } }, PretranslateAll = true, - TrainOnAll = true + TrainOnAll = false } } } @@ -791,7 +1233,7 @@ await env.Service.StartBuildAsync( } }, PretranslateAll = true, - TrainOnAll = true + TrainOnAll = false } } }, @@ -830,7 +1272,7 @@ await env.Service.StartBuildAsync( } }, PretranslateAll = true, - TrainOnAll = true + TrainOnAll = false } } } @@ -864,6 +1306,17 @@ await env.Service.StartBuildAsync( new() { CorpusRef = "parallel-corpus1-target1", ScriptureRange = "MAT 1;MRK" } } } + ], + Pretranslate = + [ + new PretranslateCorpus + { + ParallelCorpusRef = "parallel-corpus1", + SourceFilters = new List() + { + new() { CorpusRef = "parallel-corpus1-source1", ScriptureRange = "MAT 2" } + } + } ] } ); @@ -898,6 +1351,13 @@ await env.Service.StartBuildAsync( new ScriptureChapters { Chapters = { } } } }, + PretranslateChapters = + { + { + "MAT", + new ScriptureChapters { Chapters = { 2 } } + } + }, Files = { new V1.CorpusFile @@ -907,7 +1367,7 @@ await env.Service.StartBuildAsync( TextId = "file1.zip" } }, - PretranslateAll = true, + PretranslateAll = false, TrainOnAll = false }, new() @@ -923,8 +1383,8 @@ await env.Service.StartBuildAsync( TextId = "file3.zip" } }, - PretranslateAll = true, - TrainOnAll = true + PretranslateAll = false, + TrainOnAll = false } } }, @@ -973,7 +1433,7 @@ await env.Service.StartBuildAsync( } }, PretranslateAll = true, - TrainOnAll = true + TrainOnAll = false } } } @@ -1357,7 +1817,7 @@ await env.Service.StartBuildAsync( SourceFilters = new List() { new() { CorpusRef = "parallel-corpus1-source1", ScriptureRange = "MAT 1;MRK" } - } + }, } ] } @@ -1417,7 +1877,7 @@ await env.Service.StartBuildAsync( } }, PretranslateAll = true, - TrainOnAll = true + TrainOnAll = false } }, TargetCorpora = @@ -1706,6 +2166,75 @@ public async Task CreateEngineWithTextFilesAsync() return engine; } + public async Task CreateMultipleCorporaEngineWithTextFilesAsync() + { + var engine = new Engine + { + Id = "engine1", + Owner = "owner1", + SourceLanguage = "es", + TargetLanguage = "en", + Type = "Smt", + Corpora = new Models.Corpus[] + { + new() + { + Id = "corpus1", + SourceLanguage = "es", + TargetLanguage = "en", + SourceFiles = + [ + new() + { + Id = "file1", + Filename = "file1.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "text1" + } + ], + TargetFiles = + [ + new() + { + Id = "file2", + Filename = "file2.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "text1" + } + ], + }, + new() + { + Id = "corpus2", + SourceLanguage = "es", + TargetLanguage = "en", + SourceFiles = + [ + new() + { + Id = "file3", + Filename = "file3.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "text1" + } + ], + TargetFiles = + [ + new() + { + Id = "file4", + Filename = "file4.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "text1" + } + ], + } + } + }; + await Engines.InsertAsync(engine); + return engine; + } + public async Task CreateEngineWithParatextProjectAsync() { var engine = new Engine @@ -1840,6 +2369,107 @@ public async Task CreateParallelCorpusEngineWithTextFilesAsync() return engine; } + public async Task CreateMultipleParallelCorpusEngineWithTextFilesAsync() + { + var engine = new Engine + { + Id = "engine1", + Owner = "owner1", + SourceLanguage = "es", + TargetLanguage = "en", + Type = "Smt", + ParallelCorpora = new Models.ParallelCorpus[] + { + new() + { + Id = "parallel-corpus1", + SourceCorpora = new List() + { + new() + { + Id = "parallel-corpus1-source1", + Name = "", + Language = "es", + Files = + [ + new() + { + Id = "file1", + Filename = "file1.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "MAT" + } + ] + } + }, + TargetCorpora = new List() + { + new() + { + Id = "parallel-corpus1-target1", + Name = "", + Language = "en", + Files = + [ + new() + { + Id = "file2", + Filename = "file2.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "MAT" + } + ] + } + } + }, + new() + { + Id = "parallel-corpus2", + SourceCorpora = new List() + { + new() + { + Id = "parallel-corpus2-source1", + Name = "", + Language = "es", + Files = + [ + new() + { + Id = "file3", + Filename = "file3.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "MRK" + } + ] + } + }, + TargetCorpora = new List() + { + new() + { + Id = "parallel-corpus2-target1", + Name = "", + Language = "en", + Files = + [ + new() + { + Id = "file4", + Filename = "file4.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "MRK" + } + ] + } + } + } + } + }; + await Engines.InsertAsync(engine); + return engine; + } + public async Task CreateParallelCorpusEngineWithParatextProjectAsync() { var engine = new Engine diff --git a/src/Serval/test/Serval.Translation.Tests/Services/PretranslationServiceTests.cs b/src/Serval/test/Serval.Translation.Tests/Services/PretranslationServiceTests.cs index cbdcb6ff..5aca4ed6 100644 --- a/src/Serval/test/Serval.Translation.Tests/Services/PretranslationServiceTests.cs +++ b/src/Serval/test/Serval.Translation.Tests/Services/PretranslationServiceTests.cs @@ -22,7 +22,7 @@ public class PretranslationServiceTests [Test] public async Task GetUsfmAsync_Source_PreferExisting() { - TestEnvironment env = new(); + using TestEnvironment env = new(); string usfm = await env.GetUsfmAsync( PretranslationUsfmTextOrigin.PreferExisting, @@ -46,7 +46,7 @@ public async Task GetUsfmAsync_Source_PreferExisting() [Test] public async Task GetUsfmAsync_Source_PreferPretranslated() { - TestEnvironment env = new(); + using TestEnvironment env = new(); string usfm = await env.GetUsfmAsync( PretranslationUsfmTextOrigin.PreferPretranslated, @@ -70,7 +70,7 @@ public async Task GetUsfmAsync_Source_PreferPretranslated() [Test] public async Task GetUsfmAsync_Source_OnlyExisting() { - TestEnvironment env = new(); + using TestEnvironment env = new(); string usfm = await env.GetUsfmAsync( PretranslationUsfmTextOrigin.OnlyExisting, @@ -94,7 +94,7 @@ public async Task GetUsfmAsync_Source_OnlyExisting() [Test] public async Task GetUsfmAsync_Source_OnlyPretranslated() { - TestEnvironment env = new(); + using TestEnvironment env = new(); string usfm = await env.GetUsfmAsync( PretranslationUsfmTextOrigin.OnlyPretranslated, @@ -118,7 +118,7 @@ public async Task GetUsfmAsync_Source_OnlyPretranslated() [Test] public async Task GetUsfmAsync_Target_PreferExisting() { - TestEnvironment env = new(); + using TestEnvironment env = new(); env.AddMatthewToTarget(); string usfm = await env.GetUsfmAsync( @@ -143,7 +143,7 @@ public async Task GetUsfmAsync_Target_PreferExisting() [Test] public async Task GetUsfmAsync_Target_PreferPretranslated() { - TestEnvironment env = new(); + using TestEnvironment env = new(); env.AddMatthewToTarget(); string usfm = await env.GetUsfmAsync( @@ -168,7 +168,7 @@ public async Task GetUsfmAsync_Target_PreferPretranslated() [Test] public async Task GetUsfmAsync_Target_TargetBookDoesNotExist() { - TestEnvironment env = new(); + using TestEnvironment env = new(); string usfm = await env.GetUsfmAsync( PretranslationUsfmTextOrigin.PreferPretranslated, @@ -181,7 +181,7 @@ public async Task GetUsfmAsync_Target_TargetBookDoesNotExist() [Test] public async Task GetUsfmAsync_Auto_TargetBookDoesNotExist() { - TestEnvironment env = new(); + using TestEnvironment env = new(); string usfm = await env.GetUsfmAsync( PretranslationUsfmTextOrigin.PreferPretranslated, @@ -205,7 +205,7 @@ public async Task GetUsfmAsync_Auto_TargetBookDoesNotExist() [Test] public async Task GetUsfmAsync_Auto_TargetBookExists() { - TestEnvironment env = new(); + using TestEnvironment env = new(); env.AddMatthewToTarget(); string usfm = await env.GetUsfmAsync( @@ -230,7 +230,7 @@ public async Task GetUsfmAsync_Auto_TargetBookExists() [Test] public async Task GetUsfmAsync_Target_OnlyExisting() { - TestEnvironment env = new(); + using TestEnvironment env = new(); env.AddMatthewToTarget(); string usfm = await env.GetUsfmAsync( @@ -244,7 +244,7 @@ public async Task GetUsfmAsync_Target_OnlyExisting() [Test] public async Task GetUsfmAsync_Target_OnlyPretranslated() { - TestEnvironment env = new(); + using TestEnvironment env = new(); env.AddMatthewToTarget(); string usfm = await env.GetUsfmAsync( @@ -266,10 +266,26 @@ public async Task GetUsfmAsync_Target_OnlyPretranslated() ); } - private class TestEnvironment + private class TestEnvironment : IDisposable { public TestEnvironment() { + CorpusFile file1 = + new() + { + Id = "file1", + Filename = "file1.zip", + Format = Shared.Contracts.FileFormat.Paratext, + TextId = "project1" + }; + CorpusFile file2 = + new() + { + Id = "file2", + Filename = "file2.zip", + Format = Shared.Contracts.FileFormat.Paratext, + TextId = "project1" + }; Engines = new MemoryRepository( [ new() @@ -287,29 +303,45 @@ public TestEnvironment() Id = "corpus1", SourceLanguage = "en", TargetLanguage = "en", - SourceFiles = - [ + SourceFiles = [file1], + TargetFiles = [file2], + } + ] + }, + new() + { + Id = "parallel_engine1", + Owner = "owner1", + SourceLanguage = "en", + TargetLanguage = "en", + Type = "nmt", + ModelRevision = 1, + ParallelCorpora = + [ + new() + { + Id = "parallel_corpus1", + SourceCorpora = new List() + { new() { - Id = "file1", - Filename = "file1.zip", - Format = Shared.Contracts.FileFormat.Paratext, - TextId = "project1" + Id = "src_1", + Language = "en", + Files = [file1], } - ], - TargetFiles = - [ + }, + TargetCorpora = new List() + { new() { - Id = "file2", - Filename = "file2.zip", - Format = Shared.Contracts.FileFormat.Paratext, - TextId = "project1" + Id = "trg_1", + Language = "es", + Files = [file2], } - ], + } } ] - } + }, ] ); @@ -334,6 +366,26 @@ public TestEnvironment() TextId = "MAT", Refs = ["MAT 1:2"], Translation = "Chapter 1, verse 2." + }, + new() + { + Id = "pt3", + EngineRef = "parallel_engine1", + ModelRevision = 1, + CorpusRef = "parallel_corpus1", + TextId = "MAT", + Refs = ["MAT 1:1"], + Translation = "Chapter 1, verse 1." + }, + new() + { + Id = "pt4", + EngineRef = "parallel_engine1", + ModelRevision = 1, + CorpusRef = "parallel_corpus1", + TextId = "MAT", + Refs = ["MAT 1:2"], + Translation = "Chapter 1, verse 2." } ] ); @@ -342,23 +394,37 @@ public TestEnvironment() ScriptureDataFileService.GetParatextProjectSettings("file2.zip").Returns(CreateProjectSettings("TRG")); var zipSubstituteSource = Substitute.For(); var zipSubstituteTarget = Substitute.For(); - zipSubstituteSource.OpenEntry("MATSRC.SFM").Returns(new MemoryStream(Encoding.UTF8.GetBytes(SourceUsfm))); - zipSubstituteTarget.OpenEntry("MATTRG.SFM").Returns(new MemoryStream(Encoding.UTF8.GetBytes(""))); + zipSubstituteSource + .OpenEntry("MATSRC.SFM") + .Returns(x => new MemoryStream(Encoding.UTF8.GetBytes(SourceUsfm))); + zipSubstituteTarget.OpenEntry("MATTRG.SFM").Returns(x => new MemoryStream(Encoding.UTF8.GetBytes(""))); zipSubstituteSource.EntryExists(Arg.Any()).Returns(false); zipSubstituteTarget.EntryExists(Arg.Any()).Returns(false); zipSubstituteSource.EntryExists("MATSRC.SFM").Returns(true); zipSubstituteTarget.EntryExists("MATTRG.SFM").Returns(true); TargetZipContainer = zipSubstituteTarget; - using var textUpdaterSource = new Shared.Services.ZipParatextProjectTextUpdater( - zipSubstituteSource, - CreateProjectSettings("SRC") - ); - using var textUpdaterTarget = new Shared.Services.ZipParatextProjectTextUpdater( - zipSubstituteTarget, - CreateProjectSettings("TRG") - ); - ScriptureDataFileService.GetZipParatextProjectTextUpdater("file1.zip").Returns(textUpdaterSource); - ScriptureDataFileService.GetZipParatextProjectTextUpdater("file2.zip").Returns(textUpdaterTarget); + TextUpdaters = new List(); + Shared.Services.ZipParatextProjectTextUpdater GetTextUpdater(string type) + { + var updater = type switch + { + "SRC" + => new Shared.Services.ZipParatextProjectTextUpdater( + zipSubstituteSource, + CreateProjectSettings("SRC") + ), + "TRG" + => new Shared.Services.ZipParatextProjectTextUpdater( + zipSubstituteTarget, + CreateProjectSettings("TRG") + ), + _ => throw new ArgumentException() + }; + TextUpdaters.Add(updater); + return updater; + } + ScriptureDataFileService.GetZipParatextProjectTextUpdater("file1.zip").Returns(x => GetTextUpdater("SRC")); + ScriptureDataFileService.GetZipParatextProjectTextUpdater("file2.zip").Returns(x => GetTextUpdater("TRG")); Service = new PretranslationService(Pretranslations, Engines, ScriptureDataFileService); } @@ -367,6 +433,7 @@ public TestEnvironment() public MemoryRepository Engines { get; } public IScriptureDataFileService ScriptureDataFileService { get; } public IZipContainer TargetZipContainer { get; } + public IList TextUpdaters { get; } public async Task GetUsfmAsync( PretranslationUsfmTextOrigin textOrigin, @@ -381,12 +448,25 @@ PretranslationUsfmTemplate template textOrigin: textOrigin, template: template ); - return usfm.Replace("\r\n", "\n"); + usfm = usfm.Replace("\r\n", "\n"); + string parallel_usfm = await Service.GetUsfmAsync( + engineId: "parallel_engine1", + modelRevision: 1, + corpusId: "parallel_corpus1", + textId: "MAT", + textOrigin: textOrigin, + template: template + ); + parallel_usfm = parallel_usfm.Replace("\r\n", "\n"); + Assert.That(parallel_usfm, Is.EqualTo(usfm)); + return usfm; } public void AddMatthewToTarget() { - TargetZipContainer.OpenEntry("MATTRG.SFM").Returns(new MemoryStream(Encoding.UTF8.GetBytes(TargetUsfm))); + TargetZipContainer + .OpenEntry("MATTRG.SFM") + .Returns(x => new MemoryStream(Encoding.UTF8.GetBytes(TargetUsfm))); } private static ParatextProjectSettings CreateProjectSettings(string name) @@ -406,5 +486,13 @@ private static ParatextProjectSettings CreateProjectSettings(string name) languageCode: "en" ); } + + public void Dispose() + { + foreach (var updater in TextUpdaters) + { + updater.Dispose(); + } + } } }