Skip to content

Commit

Permalink
Sync corpora and data files in translation engine
Browse files Browse the repository at this point in the history
Use mongoDB strings directly for Mongo array filtering (hacky solution - need to fix later #553).
Broken implementation for MemoryRepository - #554
  • Loading branch information
johnml1135 committed Dec 4, 2024
1 parent 69fa745 commit a6650e5
Show file tree
Hide file tree
Showing 25 changed files with 417 additions and 114 deletions.
2 changes: 1 addition & 1 deletion docker-compose.mongo.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ services:
[
'/bin/sh',
'-c',
'mongod --quiet --replSet myRS --bind_ip 0.0.0.0 & sleep 2s; mongosh --host localhost:27017 --eval '' config = { "_id" : "myRS", "members" : [{"_id" : 0,"host" : "mongo:27017"}] }; rs.initiate(config, { force: true }); '' ; sleep infinity'
'mongod --profile=2 --replSet myRS --bind_ip 0.0.0.0 & sleep 2s; mongosh --host localhost:27017 --eval '' config = { "_id" : "myRS", "members" : [{"_id" : 0,"host" : "mongo:27017"}] }; rs.initiate(config, { force: true }); '' ; sleep infinity'
]
1 change: 1 addition & 0 deletions src/DataAccess/src/SIL.DataAccess/ArrayPosition.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ public static class ArrayPosition
{
public const int FirstMatching = int.MaxValue;
public const int All = int.MaxValue - 1;
public const int ArrayFilter = int.MaxValue - 2;
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ LinqProvider linqProvider
linqProvider
);
string fieldName = rendered.FieldName.Replace(ArrayPosition.All.ToString(CultureInfo.InvariantCulture), "$[]");
fieldName = fieldName.Replace(
ArrayPosition.ArrayFilter.ToString(CultureInfo.InvariantCulture),
"$[arrayFilter]"
);
fieldName = fieldName.Replace(ArrayPosition.FirstMatching.ToString(CultureInfo.InvariantCulture), "$");
if (fieldName != rendered.FieldName)
{
Expand Down
8 changes: 8 additions & 0 deletions src/DataAccess/src/SIL.DataAccess/IRepository.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,17 @@ public interface IRepository<T>
bool returnOriginal = false,
CancellationToken cancellationToken = default
);
Task<int> UpdateAllAsync<TFilter>(
Expression<Func<T, bool>> filter,
Action<IUpdateBuilder<T>> update,
string jsonArrayFilterDefinition,
CancellationToken cancellationToken = default
);

Task<int> UpdateAllAsync(
Expression<Func<T, bool>> filter,
Action<IUpdateBuilder<T>> update,
UpdateOptions? updateOptions = null,
CancellationToken cancellationToken = default
);
Task<T?> DeleteAsync(Expression<Func<T, bool>> filter, CancellationToken cancellationToken = default);
Expand Down
11 changes: 11 additions & 0 deletions src/DataAccess/src/SIL.DataAccess/MemoryRepository.cs
Original file line number Diff line number Diff line change
Expand Up @@ -233,9 +233,20 @@ public async Task InsertAllAsync(IReadOnlyCollection<T> entities, CancellationTo
return returnOriginal ? original : entity;
}

public async Task<int> UpdateAllAsync<TFilter>(
Expression<Func<T, bool>> filter,
Action<IUpdateBuilder<T>> update,
string jsonArrayFilterDefinition,
CancellationToken cancellationToken = default
)
{
return await UpdateAllAsync(filter, update, null, cancellationToken);
}

public async Task<int> UpdateAllAsync(
Expression<Func<T, bool>> filter,
Action<IUpdateBuilder<T>> update,
UpdateOptions? updateOptions = null,
CancellationToken cancellationToken = default
)
{
Expand Down
2 changes: 2 additions & 0 deletions src/DataAccess/src/SIL.DataAccess/MemoryUpdateBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,8 @@ Expression<Func<T, TField>> field
}
break;
case ArrayPosition.All:
// This doesn't filter as it should - but it's good enough for unit testing.
case ArrayPosition.ArrayFilter:
newOwners.AddRange(((IEnumerable)owner).Cast<object>());
break;
default:
Expand Down
25 changes: 23 additions & 2 deletions src/DataAccess/src/SIL.DataAccess/MongoRepository.cs
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,24 @@ await _collection
return entity;
}

public async Task<int> UpdateAllAsync<TFilter>(
Expression<Func<T, bool>> filter,
Action<IUpdateBuilder<T>> update,
string jsonArrayFilterDefinition,
CancellationToken cancellationToken = default
)
{
var updateOptions = new UpdateOptions
{
ArrayFilters = [new JsonArrayFilterDefinition<TFilter>(jsonArrayFilterDefinition)]
};
return await UpdateAllAsync(filter, update, updateOptions, cancellationToken);
}

public async Task<int> UpdateAllAsync(
Expression<Func<T, bool>> filter,
Action<IUpdateBuilder<T>> update,
UpdateOptions? updateOptions = null,
CancellationToken cancellationToken = default
)
{
Expand All @@ -167,13 +182,19 @@ public async Task<int> UpdateAllAsync(
if (_context.Session is not null)
{
result = await _collection
.UpdateManyAsync(_context.Session, filter, updateDef, cancellationToken: cancellationToken)
.UpdateManyAsync(
_context.Session,
filter,
updateDef,
updateOptions,
cancellationToken: cancellationToken
)
.ConfigureAwait(false);
}
else
{
result = await _collection
.UpdateManyAsync(filter, updateDef, cancellationToken: cancellationToken)
.UpdateManyAsync(filter, updateDef, updateOptions, cancellationToken: cancellationToken)
.ConfigureAwait(false);
}
}
Expand Down
22 changes: 12 additions & 10 deletions src/DataAccess/src/SIL.DataAccess/MongoUpdateBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,36 +3,38 @@ namespace SIL.DataAccess;
public class MongoUpdateBuilder<T> : IUpdateBuilder<T>
where T : IEntity
{
private readonly UpdateDefinitionBuilder<T> _builder;
private readonly UpdateDefinitionBuilder<T> _updateBuilder;
private readonly FilterDefinitionBuilder<T> _filterBuilder;
private readonly List<UpdateDefinition<T>> _defs;

public MongoUpdateBuilder()
{
_builder = Builders<T>.Update;
_updateBuilder = Builders<T>.Update;
_filterBuilder = Builders<T>.Filter;
_defs = new List<UpdateDefinition<T>>();
}

public IUpdateBuilder<T> Set<TField>(Expression<Func<T, TField>> field, TField value)
{
_defs.Add(_builder.Set(ToFieldDefinition(field), value));
_defs.Add(_updateBuilder.Set(ToFieldDefinition(field), value));
return this;
}

public IUpdateBuilder<T> SetOnInsert<TField>(Expression<Func<T, TField>> field, TField value)
{
_defs.Add(_builder.SetOnInsert(ToFieldDefinition(field), value));
_defs.Add(_updateBuilder.SetOnInsert(ToFieldDefinition(field), value));
return this;
}

public IUpdateBuilder<T> Unset<TField>(Expression<Func<T, TField>> field)
{
_defs.Add(_builder.Unset(ToFieldDefinition(field)));
_defs.Add(_updateBuilder.Unset(ToFieldDefinition(field)));
return this;
}

public IUpdateBuilder<T> Inc(Expression<Func<T, int>> field, int value = 1)
{
_defs.Add(_builder.Inc(ToFieldDefinition(field), value));
_defs.Add(_updateBuilder.Inc(ToFieldDefinition(field), value));
return this;
}

Expand All @@ -41,27 +43,27 @@ public IUpdateBuilder<T> RemoveAll<TItem>(
Expression<Func<TItem, bool>> predicate
)
{
_defs.Add(_builder.PullFilter(ToFieldDefinition(field), Builders<TItem>.Filter.Where(predicate)));
_defs.Add(_updateBuilder.PullFilter(ToFieldDefinition(field), Builders<TItem>.Filter.Where(predicate)));
return this;
}

public IUpdateBuilder<T> Remove<TItem>(Expression<Func<T, IEnumerable<TItem>?>> field, TItem value)
{
_defs.Add(_builder.Pull(ToFieldDefinition(field), value));
_defs.Add(_updateBuilder.Pull(ToFieldDefinition(field), value));
return this;
}

public IUpdateBuilder<T> Add<TItem>(Expression<Func<T, IEnumerable<TItem>?>> field, TItem value)
{
_defs.Add(_builder.Push(ToFieldDefinition(field), value));
_defs.Add(_updateBuilder.Push(ToFieldDefinition(field), value));
return this;
}

public UpdateDefinition<T> Build()
{
if (_defs.Count == 1)
return _defs.Single();
return _builder.Combine(_defs);
return _updateBuilder.Combine(_defs);
}

private static FieldDefinition<T, TField> ToFieldDefinition<TField>(Expression<Func<T, TField>> field)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ private async Task ReleaseAllWriterLocksAsync(CancellationToken cancellationToke
await _locks.UpdateAllAsync(
rwl => rwl.WriterLock != null && rwl.WriterLock.HostId == _serviceOptions.ServiceId,
u => u.Unset(rwl => rwl.WriterLock),
cancellationToken
cancellationToken: cancellationToken
);
}

Expand All @@ -64,7 +64,7 @@ private async Task ReleaseAllReaderLocksAsync(CancellationToken cancellationToke
await _locks.UpdateAllAsync(
rwl => rwl.ReaderLocks.Any(l => l.HostId == _serviceOptions.ServiceId),
u => u.RemoveAll(rwl => rwl.ReaderLocks, l => l.HostId == _serviceOptions.ServiceId),
cancellationToken
cancellationToken: cancellationToken
);
}

Expand All @@ -73,7 +73,7 @@ private async Task RemoveAllWaitersAsync(CancellationToken cancellationToken)
await _locks.UpdateAllAsync(
rwl => rwl.WriterQueue.Any(l => l.HostId == _serviceOptions.ServiceId),
u => u.RemoveAll(rwl => rwl.WriterQueue, l => l.HostId == _serviceOptions.ServiceId),
cancellationToken
cancellationToken: cancellationToken
);
}
}
60 changes: 30 additions & 30 deletions src/Serval/src/Serval.Client/Client.g.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7073,9 +7073,37 @@ public partial class Corpus
[System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")]
public partial class CorpusFile
{
[Newtonsoft.Json.JsonProperty("file", Required = Newtonsoft.Json.Required.Always)]
[Newtonsoft.Json.JsonProperty("fileId", Required = Newtonsoft.Json.Required.Always)]
[System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)]
public string FileId { get; set; } = default!;

[Newtonsoft.Json.JsonProperty("textId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)]
public string? TextId { get; set; } = default!;

}

[System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")]
public partial class CorpusConfig
{
[Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)]
public string? Name { get; set; } = default!;

[Newtonsoft.Json.JsonProperty("language", Required = Newtonsoft.Json.Required.Always)]
[System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)]
public string Language { get; set; } = default!;

[Newtonsoft.Json.JsonProperty("files", Required = Newtonsoft.Json.Required.Always)]
[System.ComponentModel.DataAnnotations.Required]
public DataFile File { get; set; } = new DataFile();
public System.Collections.Generic.IList<CorpusFileConfig> Files { get; set; } = new System.Collections.ObjectModel.Collection<CorpusFileConfig>();

}

[System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")]
public partial class CorpusFileConfig
{
[Newtonsoft.Json.JsonProperty("fileId", Required = Newtonsoft.Json.Required.Always)]
[System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)]
public string FileId { get; set; } = default!;

[Newtonsoft.Json.JsonProperty("textId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)]
public string? TextId { get; set; } = default!;
Expand Down Expand Up @@ -7118,34 +7146,6 @@ public enum FileFormat

}

[System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")]
public partial class CorpusConfig
{
[Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)]
public string? Name { get; set; } = default!;

[Newtonsoft.Json.JsonProperty("language", Required = Newtonsoft.Json.Required.Always)]
[System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)]
public string Language { get; set; } = default!;

[Newtonsoft.Json.JsonProperty("files", Required = Newtonsoft.Json.Required.Always)]
[System.ComponentModel.DataAnnotations.Required]
public System.Collections.Generic.IList<CorpusFileConfig> Files { get; set; } = new System.Collections.ObjectModel.Collection<CorpusFileConfig>();

}

[System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")]
public partial class CorpusFileConfig
{
[Newtonsoft.Json.JsonProperty("fileId", Required = Newtonsoft.Json.Required.Always)]
[System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)]
public string FileId { get; set; } = default!;

[Newtonsoft.Json.JsonProperty("textId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)]
public string? TextId { get; set; } = default!;

}

[System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")]
public partial class TranslationEngine
{
Expand Down
28 changes: 17 additions & 11 deletions src/Serval/src/Serval.DataFiles/Consumers/GetCorpusConsumer.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
namespace Serval.DataFiles.Consumers;

public class GetCorpusConsumer(ICorpusService corpusService) : IConsumer<GetCorpus>
public class GetCorpusConsumer(ICorpusService corpusService, IDataFileService dataFileService) : IConsumer<GetCorpus>
{
private readonly ICorpusService _corpusService = corpusService;
private readonly IDataFileService _dataFileService = dataFileService;

public async Task Consume(ConsumeContext<GetCorpus> context)
{
Expand All @@ -19,19 +20,13 @@ await context.RespondAsync(
CorpusId = corpus.Id,
Name = corpus.Name,
Language = corpus.Language,
Files = corpus
.Files.Select(f => new CorpusFileResult
Files = await Task.WhenAll(
corpus.Files.Select(async f => new CorpusFileResult
{
TextId = f.TextId!,
File = new DataFileResult
{
DataFileId = f.File.Id,
Filename = f.File.Filename,
Format = f.File.Format,
Name = f.File.Name
}
File = Map(await _dataFileService.GetAsync(f.FileId))
})
.ToList()
)
}
);
}
Expand All @@ -42,4 +37,15 @@ await context.RespondAsync(
);
}
}

private static DataFileResult Map(DataFile dataFile)
{
return new DataFileResult
{
DataFileId = dataFile.Id,
Name = dataFile.Name,
Filename = dataFile.Filename,
Format = dataFile.Format,
};
}
}
2 changes: 1 addition & 1 deletion src/Serval/src/Serval.DataFiles/Contracts/CorpusFileDto.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ namespace Serval.DataFiles.Contracts;

public record CorpusFileDto
{
public required DataFileDto File { get; init; }
public required string FileId { get; init; }
public string? TextId { get; init; }
}
16 changes: 2 additions & 14 deletions src/Serval/src/Serval.DataFiles/Controllers/CorporaController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ CancellationToken cancellationToken
DataFile? dataFile = await _dataFileService.GetAsync(file.FileId, cancellationToken);
if (dataFile == null)
throw new InvalidOperationException($"DataFile with id {file.FileId} does not exist.");
dataFiles.Add(new CorpusFile { File = dataFile, TextId = file.TextId });
dataFiles.Add(new CorpusFile { FileId = file.FileId, TextId = file.TextId });
}
return dataFiles;
}
Expand All @@ -197,18 +197,6 @@ private CorpusDto Map(Corpus source)

private CorpusFileDto Map(CorpusFile source)
{
return new CorpusFileDto { File = Map(source.File), TextId = source.TextId };
}

private DataFileDto Map(DataFile source)
{
return new DataFileDto
{
Id = source.Id,
Url = _urlService.GetUrl(Endpoints.GetDataFile, new { id = source.Id }),
Name = source.Name,
Format = source.Format,
Revision = source.Revision
};
return new CorpusFileDto { FileId = source.FileId, TextId = source.TextId };
}
}
2 changes: 1 addition & 1 deletion src/Serval/src/Serval.DataFiles/Models/CorpusFile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ namespace Serval.DataFiles.Models;

public record CorpusFile
{
public required DataFile File { get; init; }
public required string FileId { get; init; }
public string? TextId { get; init; }
}
Loading

0 comments on commit a6650e5

Please sign in to comment.