Skip to content

Commit

Permalink
batch import entries as a single commit. Import Semantic domains, Par…
Browse files Browse the repository at this point in the history
…ts of Speech.
  • Loading branch information
hahn-kev committed Jul 11, 2024
1 parent ca160ca commit 4c77c30
Show file tree
Hide file tree
Showing 8 changed files with 108 additions and 19 deletions.
13 changes: 12 additions & 1 deletion backend/FwLite/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ internal int GetWritingSystemHandle(WritingSystemId ws, WritingSystemType? type
return lcmWs.Handle;
}


internal CoreWritingSystemDefinition? GetLcmWritingSystem(WritingSystemId ws, WritingSystemType? type = null)
{
if (ws == "default")
Expand Down Expand Up @@ -172,6 +173,11 @@ public async IAsyncEnumerable<PartOfSpeech> GetPartsOfSpeech()
}
}

public async Task CreatePartOfSpeech(PartOfSpeech partOfSpeech)

Check warning on line 176 in backend/FwLite/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs

View workflow job for this annotation

GitHub Actions / Build FW Lite

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.

Check warning on line 176 in backend/FwLite/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs

View workflow job for this annotation

GitHub Actions / Build FW Lite

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.
{
throw new NotImplementedException();
}

public async IAsyncEnumerable<SemanticDomain> GetSemanticDomains()

Check warning on line 181 in backend/FwLite/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs

View workflow job for this annotation

GitHub Actions / Build FW Lite

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.

Check warning on line 181 in backend/FwLite/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs

View workflow job for this annotation

GitHub Actions / Build FW Lite

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.
{
foreach (var semanticDomain in _semanticDomainRepository.AllInstances().OrderBy(p => p.Name.BestAnalysisAlternative.Text))
Expand All @@ -180,11 +186,16 @@ public async IAsyncEnumerable<SemanticDomain> GetSemanticDomains()
{
Id = semanticDomain.Guid,
Name = FromLcmMultiString(semanticDomain.Name),
Code = semanticDomain.OcmCodes
Code = semanticDomain.OcmCodes ?? ""
};
}
}

public async Task CreateSemanticDomain(SemanticDomain semanticDomain)

Check warning on line 194 in backend/FwLite/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs

View workflow job for this annotation

GitHub Actions / Build FW Lite

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.

Check warning on line 194 in backend/FwLite/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs

View workflow job for this annotation

GitHub Actions / Build FW Lite

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.
{
throw new NotImplementedException();
}

internal ICmSemanticDomain GetLcmSemanticDomain(Guid semanticDomainId)
{
return _semanticDomainRepository.GetObject(semanticDomainId);
Expand Down
40 changes: 39 additions & 1 deletion backend/FwLite/LcmCrdt/CrdtLexboxApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,21 @@ public IAsyncEnumerable<PartOfSpeech> GetPartsOfSpeech()
return PartsOfSpeech.AsAsyncEnumerable();
}

public async Task CreatePartOfSpeech(PartOfSpeech partOfSpeech)
{
await dataModel.AddChange(ClientId, new CreatePartOfSpeechChange(partOfSpeech.Id, partOfSpeech.Name, false));
}

public IAsyncEnumerable<MiniLcm.SemanticDomain> GetSemanticDomains()
{
return SemanticDomains.AsAsyncEnumerable();
}

public async Task CreateSemanticDomain(MiniLcm.SemanticDomain semanticDomain)
{
await dataModel.AddChange(ClientId, new CreateSemanticDomainChange(semanticDomain.Id, semanticDomain.Name, semanticDomain.Code));
}

public IAsyncEnumerable<MiniLcm.Entry> GetEntries(QueryOptions? options = null)
{
return GetEntriesAsyncEnum(predicate: null, options);
Expand Down Expand Up @@ -188,6 +198,35 @@ await dataModel.AddChanges(ClientId,
], deferCommit: true);
}

public async Task BulkCreateEntries(IAsyncEnumerable<MiniLcm.Entry> entries)
{
var semanticDomains = await SemanticDomains.ToDictionaryAsync(sd => sd.Id, sd => sd);
var partsOfSpeech = await PartsOfSpeech.ToDictionaryAsync(p => p.Id, p => p);
await dataModel.AddChanges(ClientId, entries.ToBlockingEnumerable().SelectMany(entry => CreateEntryChanges(entry, semanticDomains, partsOfSpeech)));
}

private IEnumerable<IChange> CreateEntryChanges(MiniLcm.Entry entry, Dictionary<Guid, SemanticDomain> semanticDomains, Dictionary<Guid, Objects.PartOfSpeech> partsOfSpeech)
{
yield return new CreateEntryChange(entry);
foreach (var sense in entry.Senses)
{
sense.SemanticDomains = sense.SemanticDomains
.Select(sd => semanticDomains.TryGetValue(sd.Id, out var selectedSd) ? selectedSd : null)
.OfType<MiniLcm.SemanticDomain>()
.ToList();
if (sense.PartOfSpeechId is not null && partsOfSpeech.TryGetValue(sense.PartOfSpeechId.Value, out var partOfSpeech))
{
sense.PartOfSpeechId = partOfSpeech.Id;
sense.PartOfSpeech = partOfSpeech.Name["en"] ?? string.Empty;
}
yield return new CreateSenseChange(sense, entry.Id);
foreach (var exampleSentence in sense.ExampleSentences)
{
yield return new CreateExampleSentenceChange(exampleSentence, sense.Id);
}
}
}

public async Task<MiniLcm.Entry> CreateEntry(MiniLcm.Entry entry)
{
await dataModel.AddChanges(ClientId,
Expand Down Expand Up @@ -226,7 +265,6 @@ private async IAsyncEnumerable<IChange> CreateSenseChanges(Guid entryId, MiniLcm
sense.PartOfSpeech = partOfSpeech?.Name["en"] ?? string.Empty;
}


yield return new CreateSenseChange(sense, entryId);
foreach (var change in sense.ExampleSentences.Select(sentence =>
new CreateExampleSentenceChange(sentence, sense.Id)))
Expand Down
10 changes: 10 additions & 0 deletions backend/FwLite/LocalWebApp/Hubs/CrdtMiniLcmApiHub.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,16 @@ public async Task<WritingSystem> UpdateWritingSystem(WritingSystemId id, Writing
return writingSystem;
}

public IAsyncEnumerable<PartOfSpeech> GetPartsOfSpeech()
{
return lexboxApi.GetPartsOfSpeech();
}

public IAsyncEnumerable<SemanticDomain> GetSemanticDomains()
{
return lexboxApi.GetSemanticDomains();
}

public IAsyncEnumerable<Entry> GetEntriesForExemplar(string exemplar, QueryOptions? options = null)
{
throw new NotImplementedException();
Expand Down
1 change: 1 addition & 0 deletions backend/FwLite/LocalWebApp/LocalWebApp.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Humanizer.Core" Version="2.14.1" />
<PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="8.0.4"/>
<PackageReference Include="Microsoft.Extensions.FileProviders.Embedded" Version="8.0.4" />
<PackageReference Include="Microsoft.Identity.Client.Extensions.Msal" Version="4.61.0" />
Expand Down
42 changes: 28 additions & 14 deletions backend/FwLite/LocalWebApp/Services/ImportFwdataService.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using FwDataMiniLcmBridge;
using FwDataMiniLcmBridge.Api;
using FwDataMiniLcmBridge.LcmUtils;
using System.Diagnostics;
using FwDataMiniLcmBridge;
using Humanizer;
using LcmCrdt;
using MiniLcm;

Expand All @@ -10,6 +10,7 @@ public class ImportFwdataService(ProjectsService projectsService, ILogger<Import
{
public async Task<CrdtProject> Import(string projectName)
{
var startTime = Stopwatch.GetTimestamp();
var fwDataProject = FieldWorksProjectList.GetProject(projectName);
if (fwDataProject is null)
{
Expand All @@ -22,11 +23,12 @@ public async Task<CrdtProject> Import(string projectName)
var crdtApi = provider.GetRequiredService<ILexboxApi>();
await ImportProject(crdtApi, fwDataApi, fwDataApi.EntryCount);
});
logger.LogInformation("Import of {ProjectName} complete!", fwDataApi.Project.Name);
var timeSpent = Stopwatch.GetElapsedTime(startTime);
logger.LogInformation("Import of {ProjectName} complete, took {TimeSpend}", fwDataApi.Project.Name, timeSpent.Humanize());
return project;
}

async Task ImportProject(ILexboxApi importTo, ILexboxApi importFrom, int entryCount)
private async Task ImportProject(ILexboxApi importTo, ILexboxApi importFrom, int entryCount)
{
var writingSystems = await importFrom.GetWritingSystems();
foreach (var ws in writingSystems.Analysis)
Expand All @@ -41,19 +43,31 @@ async Task ImportProject(ILexboxApi importTo, ILexboxApi importFrom, int entryCo
logger.LogInformation("Imported ws {WsId}", ws.Id);
}

var index = 0;
await foreach (var entry in importFrom.GetEntries(new QueryOptions(Count: 100_000, Offset: 0)))
await foreach (var semanticDomain in importFrom.GetSemanticDomains())
{
if (importTo is CrdtLexboxApi crdtLexboxApi)
{
await crdtLexboxApi.CreateEntryLite(entry);
}
else
await importTo.CreateSemanticDomain(semanticDomain);
logger.LogTrace("Imported semantic domain {Id}", semanticDomain.Id);
}
await foreach (var partOfSpeech in importFrom.GetPartsOfSpeech())
{
await importTo.CreatePartOfSpeech(partOfSpeech);
logger.LogInformation("Imported part of speech {Id}", partOfSpeech.Id);
}

var entries = importFrom.GetEntries(new QueryOptions(Count: 100_000, Offset: 0));
if (importTo is CrdtLexboxApi crdtLexboxApi)
{
await crdtLexboxApi.BulkCreateEntries(entries);
}
else
{
var index = 0;
await foreach (var entry in entries)
{
await importTo.CreateEntry(entry);
logger.LogTrace("Imported entry, {Index} of {Count} {Id}", index++, entryCount, entry.Id);
}

logger.LogInformation("Imported entry, {Index} of {Count} {Id}", index++, entryCount, entry.Id);
}
logger.LogInformation("Imported {Count} entries", entryCount);
}
}
5 changes: 5 additions & 0 deletions backend/FwLite/MiniLcm/ILexboxApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,15 @@ IAsyncEnumerable<PartOfSpeech> GetPartsOfSpeech()
{
throw new NotImplementedException();
}
Task CreatePartOfSpeech(PartOfSpeech partOfSpeech);
IAsyncEnumerable<SemanticDomain> GetSemanticDomains()
{
throw new NotImplementedException();
}

Task CreateSemanticDomain(SemanticDomain semanticDomain);


IAsyncEnumerable<Entry> GetEntries(QueryOptions? options = null);
IAsyncEnumerable<Entry> SearchEntries(string query, QueryOptions? options = null);
Task<Entry?> GetEntry(Guid id);
Expand Down
9 changes: 9 additions & 0 deletions backend/FwLite/MiniLcm/InMemoryApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,15 @@ public Task<Sense> CreateSense(Guid entryId, Sense sense)
return Task.FromResult(sense);
}

public async Task CreatePartOfSpeech(PartOfSpeech partOfSpeech)

Check warning on line 167 in backend/FwLite/MiniLcm/InMemoryApi.cs

View workflow job for this annotation

GitHub Actions / Build FW Lite

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.

Check warning on line 167 in backend/FwLite/MiniLcm/InMemoryApi.cs

View workflow job for this annotation

GitHub Actions / Build FW Lite

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.
{
throw new NotImplementedException();
}

public async Task CreateSemanticDomain(SemanticDomain semanticDomain)

Check warning on line 172 in backend/FwLite/MiniLcm/InMemoryApi.cs

View workflow job for this annotation

GitHub Actions / Build FW Lite

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.

Check warning on line 172 in backend/FwLite/MiniLcm/InMemoryApi.cs

View workflow job for this annotation

GitHub Actions / Build FW Lite

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.
{
throw new NotImplementedException();
}

public Task DeleteEntry(Guid id)
{
Expand Down
7 changes: 4 additions & 3 deletions frontend/viewer/src/HomeView.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,16 @@
projectsPromise = fetchProjects();
}
let loading = '';
let importing = '';
async function importFwDataProject(name: string) {
loading = name;
importing = name;
await fetch(`/api/import/fwdata/${name}`, {
method: 'POST',
});
projectsPromise = fetchProjects();
await projectsPromise;
loading = '';
importing = '';
}
let downloading = '';
Expand Down Expand Up @@ -194,6 +194,7 @@
{:else if rowData.fwdata}
<Button
size="md"
loading={importing === rowData.name}
icon={mdiBookArrowLeftOutline}
on:click={() => importFwDataProject(rowData.name)}
>
Expand Down

0 comments on commit 4c77c30

Please sign in to comment.