From ae60963a681b9f24910e15b039999b985158821f Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Mon, 9 Dec 2024 11:46:06 +0700 Subject: [PATCH 1/3] Add "ldmlzip" command to hg command runner Will return 403 Forbidden if project does not allow sharing ws data with SLDR. Will also return same 403 Forbidden error code if project does not exist, to avoid possibly leaking project codes. If project exists and allows data sharing, command will return a zipfile containing CachedSettings/WritingSystems/*.ldml from the tip revision. --- hgweb/command-runner.sh | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/hgweb/command-runner.sh b/hgweb/command-runner.sh index 5ac78be22..4d6e6a6ee 100644 --- a/hgweb/command-runner.sh +++ b/hgweb/command-runner.sh @@ -1,7 +1,7 @@ #!/bin/bash # Define the list of allowed commands -allowed_commands=("verify" "tip" "tipdate" "reposizeinkb" "wesaylexentrycount" "lexentrycount" "flexprojectid" "flexwritingsystems" "flexmodelversion" "recover" "healthz" "invalidatedircache") +allowed_commands=("verify" "tip" "tipdate" "ldmlzip" "reposizeinkb" "wesaylexentrycount" "lexentrycount" "flexprojectid" "flexwritingsystems" "flexmodelversion" "recover" "healthz" "invalidatedircache") # Get the project code and command name from the URL IFS='/' read -ra PATH_SEGMENTS <<< "$PATH_INFO" @@ -9,7 +9,7 @@ project_code="${PATH_SEGMENTS[1]}" command_name="${PATH_SEGMENTS[2]}" # Ensure the project code and command name are safe to use in a shell command -if [[ ! $project_code =~ ^[a-z0-9][a-z0-9-]*$ ]] || [[ ! $command_name =~ ^[a-zA-Z0-9]+$ ]]; then +if [[ ! "$project_code" =~ ^[a-z0-9][a-z0-9-]*$ ]] || [[ ! "$command_name" =~ ^[a-zA-Z0-9]+$ ]]; then echo "Content-type: text/plain" echo "Status: 400 Bad Request" echo "" @@ -38,8 +38,22 @@ if [[ $command_name == "healthz" ]]; then exit 0 fi +if [[ $command_name == "ldmlzip" ]]; then + # Preflight check: ldml zip access is only allowed if LexiconSettings.plsx contains addToSldr="true" + if (chg -R /var/hg/repos/$first_char/$project_code cat -r tip CachedSettings/SharedSettings/LexiconSettings.plsx | grep ' Date: Wed, 15 Jan 2025 14:11:54 -0500 Subject: [PATCH 2/3] First attempt at streaming zip, doesn't work Alas, this fails (when a ProjectController method is added to call PrepareLdmlZip) with "System.InvalidOperationException: Synchronous operations are disallowed. Call WriteAsync or set AllowSynchronousIO to true instead." We'll have to change this to prepare the entire zip file first, then send it. --- backend/LexBoxApi/Jobs/DelayedLexJob.cs | 28 +++++++++++++++++ .../LexBoxApi/Jobs/DeleteTempDirectoryJob.cs | 30 +++++++++++++++++++ backend/LexBoxApi/ScheduledTasksKernel.cs | 1 + backend/LexBoxApi/Services/HgService.cs | 18 +++++++++++ backend/LexBoxApi/Services/ProjectService.cs | 29 +++++++++++++++++- .../LexCore/ServiceInterfaces/IHgService.cs | 1 + 6 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 backend/LexBoxApi/Jobs/DelayedLexJob.cs create mode 100644 backend/LexBoxApi/Jobs/DeleteTempDirectoryJob.cs diff --git a/backend/LexBoxApi/Jobs/DelayedLexJob.cs b/backend/LexBoxApi/Jobs/DelayedLexJob.cs new file mode 100644 index 000000000..25060d61b --- /dev/null +++ b/backend/LexBoxApi/Jobs/DelayedLexJob.cs @@ -0,0 +1,28 @@ +using System.Diagnostics; +using LexBoxApi.Otel; +using OpenTelemetry.Trace; +using Quartz; + +namespace LexBoxApi.Jobs; + +public abstract class DelayedLexJob() : LexJob +{ + protected static async Task QueueJob(ISchedulerFactory schedulerFactory, + JobKey key, + JobDataMap data, + TimeSpan delay, + CancellationToken cancellationToken = default) + { + var now = DateTime.UtcNow; + data[nameof(JobTriggerTraceId)] = Activity.Current?.Context.TraceId.ToHexString() ?? string.Empty; + data[nameof(JobTriggerSpanParentId)] = Activity.Current?.Context.SpanId.ToHexString() ?? string.Empty; + var trigger = TriggerBuilder.Create() + .WithIdentity(key.Name + "_Trigger", key.Group) + .StartAt(now.Add(delay)) + .ForJob(key.Name, key.Group) + .UsingJobData(data) + .Build(); + var scheduler = await schedulerFactory.GetScheduler(cancellationToken); + await scheduler.ScheduleJob(trigger, cancellationToken); + } +} diff --git a/backend/LexBoxApi/Jobs/DeleteTempDirectoryJob.cs b/backend/LexBoxApi/Jobs/DeleteTempDirectoryJob.cs new file mode 100644 index 000000000..3c8306349 --- /dev/null +++ b/backend/LexBoxApi/Jobs/DeleteTempDirectoryJob.cs @@ -0,0 +1,30 @@ +using Quartz; + +namespace LexBoxApi.Jobs; + +public class DeleteTempDirectoryJob() : DelayedLexJob +{ + public static async Task Queue(ISchedulerFactory schedulerFactory, + string path, + TimeSpan delay, + CancellationToken cancellationToken = default) + { + await QueueJob(schedulerFactory, + Key, + new JobDataMap { { nameof(Path), path } }, + delay, + cancellationToken); + } + + public static JobKey Key { get; } = new(nameof(DeleteTempDirectoryJob), "CleanupJobs"); + public string? Path { get; set; } + + protected override Task ExecuteJob(IJobExecutionContext context) + { + ArgumentException.ThrowIfNullOrEmpty(Path); + return Task.Run(() => + { + if (Directory.Exists(Path)) Directory.Delete(Path, true); + }); + } +} diff --git a/backend/LexBoxApi/ScheduledTasksKernel.cs b/backend/LexBoxApi/ScheduledTasksKernel.cs index 572cc7b35..45372a712 100644 --- a/backend/LexBoxApi/ScheduledTasksKernel.cs +++ b/backend/LexBoxApi/ScheduledTasksKernel.cs @@ -38,6 +38,7 @@ public static void AddScheduledTasks(this IServiceCollection services, IConfigur //Setup jobs q.AddJob(CleanupResetBackupJob.Key); + q.AddJob(DeleteTempDirectoryJob.Key, j => j.StoreDurably()); q.AddJob(UpdateProjectMetadataJob.Key, j => j.StoreDurably()); q.AddJob(RetryEmailJob.Key, j => j.StoreDurably()); q.AddTrigger(opts => opts.ForJob(CleanupResetBackupJob.Key) diff --git a/backend/LexBoxApi/Services/HgService.cs b/backend/LexBoxApi/Services/HgService.cs index 32e328943..8798dc518 100644 --- a/backend/LexBoxApi/Services/HgService.cs +++ b/backend/LexBoxApi/Services/HgService.cs @@ -1,5 +1,6 @@ using System.Diagnostics; using System.IO.Compression; +using System.Net; using System.Net.Http.Headers; using System.Runtime.InteropServices; using System.Text; @@ -491,6 +492,13 @@ public async Task HgCommandHealth() return version.Trim(); } + public async Task GetLdmlZip(ProjectCode code, CancellationToken token = default) + { + var content = await ExecuteHgCommandServerCommand_ErrorsOk(code, "ldmlzip", [HttpStatusCode.Forbidden], token); + if (content is null) return null; + return new ZipArchive(await content.ReadAsStreamAsync(token), ZipArchiveMode.Read); + } + private async Task ExecuteHgCommandServerCommand(ProjectCode code, string command, CancellationToken token) { var httpClient = _hgClient.Value; @@ -500,6 +508,16 @@ private async Task ExecuteHgCommandServerCommand(ProjectCode code, return response.Content; } + private async Task ExecuteHgCommandServerCommand_ErrorsOk(ProjectCode code, string command, IEnumerable okErrors, CancellationToken token) + { + var httpClient = _hgClient.Value; + var baseUri = _options.Value.HgCommandServer; + var response = await httpClient.GetAsync($"{baseUri}{code}/{command}", HttpCompletionOption.ResponseHeadersRead, token); + if (okErrors.Contains(response.StatusCode)) return null; + response.EnsureSuccessStatusCode(); + return response.Content; + } + public async Task DetermineProjectType(ProjectCode projectCode) { var response = await GetResponseMessage(projectCode, "file/tip?style=json-lex"); diff --git a/backend/LexBoxApi/Services/ProjectService.cs b/backend/LexBoxApi/Services/ProjectService.cs index 095f43582..deb983c89 100644 --- a/backend/LexBoxApi/Services/ProjectService.cs +++ b/backend/LexBoxApi/Services/ProjectService.cs @@ -1,4 +1,6 @@ using System.Data.Common; +using System.IO.Compression; +using LexBoxApi.Jobs; using LexBoxApi.Models.Project; using LexBoxApi.Services.Email; using LexCore.Auth; @@ -13,7 +15,7 @@ namespace LexBoxApi.Services; -public class ProjectService(LexBoxDbContext dbContext, IHgService hgService, IOptions hgConfig, IMemoryCache memoryCache, IEmailService emailService) +public class ProjectService(LexBoxDbContext dbContext, IHgService hgService, IOptions hgConfig, IMemoryCache memoryCache, IEmailService emailService, Quartz.ISchedulerFactory schedulerFactory) { public async Task CreateProject(CreateProjectInput input) { @@ -269,6 +271,31 @@ public async Task ResetLexEntryCount(string projectCode) } } + public async Task ExtractLdmlZip(Project project, string destRoot, CancellationToken token = default) + { + if (project.Type != ProjectType.FLEx) return null; + var zip = await hgService.GetLdmlZip(project.Code, token); + if (zip is null) return null; + var path = System.IO.Path.Join(destRoot, project.Id.ToString()); + if (Directory.Exists(path)) Directory.Delete(path, true); + var dirInfo = Directory.CreateDirectory(path); + zip.ExtractToDirectory(dirInfo.FullName, true); + return dirInfo; + } + + public async Task PrepareLdmlZip(Stream outStream, CancellationToken token = default) + { + var path = System.IO.Path.Join(System.IO.Path.GetTempPath(), "ldml-zip"); // TODO: pick random name, rather than predictable one + if (Directory.Exists(path)) Directory.Delete(path, true); + Directory.CreateDirectory(path); + await DeleteTempDirectoryJob.Queue(schedulerFactory, path, TimeSpan.FromHours(4)); + await foreach (var project in dbContext.Projects.Where(p => p.Type == ProjectType.FLEx).AsAsyncEnumerable()) + { + await ExtractLdmlZip(project, path, token); + } + ZipFile.CreateFromDirectory(path, outStream, CompressionLevel.Fastest, includeBaseDirectory: false); + } + public async Task UpdateLastCommit(string projectCode) { var project = await dbContext.Projects.FirstOrDefaultAsync(p => p.Code == projectCode); diff --git a/backend/LexCore/ServiceInterfaces/IHgService.cs b/backend/LexCore/ServiceInterfaces/IHgService.cs index d21347e9c..e3d513ac4 100644 --- a/backend/LexCore/ServiceInterfaces/IHgService.cs +++ b/backend/LexCore/ServiceInterfaces/IHgService.cs @@ -22,6 +22,7 @@ public interface IHgService Task GetRepoSizeInKb(ProjectCode code, CancellationToken token = default); Task GetLexEntryCount(ProjectCode code, ProjectType projectType); Task GetRepositoryIdentifier(Project project); + Task GetLdmlZip(ProjectCode code, CancellationToken token = default); Task ExecuteHgRecover(ProjectCode code, CancellationToken token); Task InvalidateDirCache(ProjectCode code, CancellationToken token = default); bool HasAbandonedTransactions(ProjectCode projectCode); From f8b0dba167130e0f340697e1933009fa3027f2f9 Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Wed, 15 Jan 2025 15:04:16 -0500 Subject: [PATCH 3/3] Create zip file first, then return it This is less efficient, but the only method that actually works given the current state of the ZipFile / ZipArchive code in .NET. --- backend/LexBoxApi/Controllers/ProjectController.cs | 11 +++++++++++ backend/LexBoxApi/Jobs/DelayedLexJob.cs | 3 ++- backend/LexBoxApi/Services/ProjectService.cs | 11 ++++++++--- hgweb/command-runner.sh | 3 ++- 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/backend/LexBoxApi/Controllers/ProjectController.cs b/backend/LexBoxApi/Controllers/ProjectController.cs index 838c61847..5176c46e3 100644 --- a/backend/LexBoxApi/Controllers/ProjectController.cs +++ b/backend/LexBoxApi/Controllers/ProjectController.cs @@ -232,6 +232,17 @@ private async Task StreamHttpResponse(HttpContent hgResult) await hgResult.CopyToAsync(writer.AsStream()); } + [HttpGet("getLdmlZip")] // TODO: Discuss endpoint name, and whether it should be GET or POST, at next opportunity + [AdminRequired] // TODO: Decide on permissions, because we don't want everyone triggering this + public async Task GetLdmlZip(CancellationToken token) + { + var path = await projectService.PrepareLdmlZip(token); + Response.Headers.ContentDisposition = "attachment;filename=\"ldml.zip\""; // TODO: Put timestamp in filename, or use the filename that PrepareLdmlZip returns once it has a timestamp in it + Response.ContentType = "application/zip"; + Response.StatusCode = 200; + await Response.SendFileAsync(path, token); + } + [HttpPost("updateMissingLanguageList")] public async Task> UpdateMissingLanguageList(int limit = 10) { diff --git a/backend/LexBoxApi/Jobs/DelayedLexJob.cs b/backend/LexBoxApi/Jobs/DelayedLexJob.cs index 25060d61b..59079cd9a 100644 --- a/backend/LexBoxApi/Jobs/DelayedLexJob.cs +++ b/backend/LexBoxApi/Jobs/DelayedLexJob.cs @@ -17,7 +17,8 @@ protected static async Task QueueJob(ISchedulerFactory schedulerFactory, data[nameof(JobTriggerTraceId)] = Activity.Current?.Context.TraceId.ToHexString() ?? string.Empty; data[nameof(JobTriggerSpanParentId)] = Activity.Current?.Context.SpanId.ToHexString() ?? string.Empty; var trigger = TriggerBuilder.Create() - .WithIdentity(key.Name + "_Trigger", key.Group) + // TODO: Is there a simpler way of telling Quartz "Hey, enqueue this job after X delay"? Picking a unique trigger name each time seems unnecessarily complicated. + .WithIdentity(key.Name + "_Trigger_" + now.Ticks.ToString(), key.Group) .StartAt(now.Add(delay)) .ForJob(key.Name, key.Group) .UsingJobData(data) diff --git a/backend/LexBoxApi/Services/ProjectService.cs b/backend/LexBoxApi/Services/ProjectService.cs index deb983c89..4a0a82f2b 100644 --- a/backend/LexBoxApi/Services/ProjectService.cs +++ b/backend/LexBoxApi/Services/ProjectService.cs @@ -283,17 +283,22 @@ public async Task ResetLexEntryCount(string projectCode) return dirInfo; } - public async Task PrepareLdmlZip(Stream outStream, CancellationToken token = default) + public async Task PrepareLdmlZip(CancellationToken token = default) { var path = System.IO.Path.Join(System.IO.Path.GetTempPath(), "ldml-zip"); // TODO: pick random name, rather than predictable one if (Directory.Exists(path)) Directory.Delete(path, true); Directory.CreateDirectory(path); await DeleteTempDirectoryJob.Queue(schedulerFactory, path, TimeSpan.FromHours(4)); + var zipRoot = System.IO.Path.Join(path, "zipRoot"); + Directory.CreateDirectory(zipRoot); await foreach (var project in dbContext.Projects.Where(p => p.Type == ProjectType.FLEx).AsAsyncEnumerable()) { - await ExtractLdmlZip(project, path, token); + await ExtractLdmlZip(project, zipRoot, token); } - ZipFile.CreateFromDirectory(path, outStream, CompressionLevel.Fastest, includeBaseDirectory: false); + var zipFilePath = System.IO.Path.Join(path, "ldml.zip"); // TODO: Put timestamp in there + if (File.Exists(zipFilePath)) File.Delete(zipFilePath); + ZipFile.CreateFromDirectory(zipRoot, zipFilePath, CompressionLevel.Fastest, includeBaseDirectory: false); + return zipFilePath; } public async Task UpdateLastCommit(string projectCode) diff --git a/hgweb/command-runner.sh b/hgweb/command-runner.sh index 4d6e6a6ee..9eeb805b3 100644 --- a/hgweb/command-runner.sh +++ b/hgweb/command-runner.sh @@ -40,7 +40,8 @@ fi if [[ $command_name == "ldmlzip" ]]; then # Preflight check: ldml zip access is only allowed if LexiconSettings.plsx contains addToSldr="true" - if (chg -R /var/hg/repos/$first_char/$project_code cat -r tip CachedSettings/SharedSettings/LexiconSettings.plsx | grep '/dev/null); then CONTENT_TYPE="application/zip" else echo "Content-type: text/plain"