Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New API for downloading LDML files from projects that allow sharing WS data #1309

Merged
merged 15 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions backend/LexBoxApi/Controllers/ProjectController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,17 @@ private async Task StreamHttpResponse(HttpContent hgResult)
await hgResult.CopyToAsync(writer.AsStream());
}

[HttpGet("getLdmlZip")] // TODO: Discuss endpoint name, and whether it should be GET or POST, at next opportunity
hahn-kev marked this conversation as resolved.
Show resolved Hide resolved
[AdminRequired] // TODO: Decide on permissions, because we don't want everyone triggering this
hahn-kev marked this conversation as resolved.
Show resolved Hide resolved
public async Task GetLdmlZip(CancellationToken token)
{
var path = await projectService.PrepareLdmlZip(token);
Response.Headers.ContentDisposition = "attachment;filename=\"ldml.zip\""; // TODO: Put timestamp in filename, or use the filename that PrepareLdmlZip returns once it has a timestamp in it
Response.ContentType = "application/zip";
Response.StatusCode = 200;
await Response.SendFileAsync(path, token);
hahn-kev marked this conversation as resolved.
Show resolved Hide resolved
}

[HttpPost("updateMissingLanguageList")]
public async Task<ActionResult<string[]>> UpdateMissingLanguageList(int limit = 10)
{
Expand Down
29 changes: 29 additions & 0 deletions backend/LexBoxApi/Jobs/DelayedLexJob.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
using System.Diagnostics;
using LexBoxApi.Otel;
using OpenTelemetry.Trace;
using Quartz;

namespace LexBoxApi.Jobs;

public abstract class DelayedLexJob() : LexJob
hahn-kev marked this conversation as resolved.
Show resolved Hide resolved
{
protected static async Task QueueJob(ISchedulerFactory schedulerFactory,
JobKey key,
JobDataMap data,
TimeSpan delay,
CancellationToken cancellationToken = default)
{
var now = DateTime.UtcNow;
data[nameof(JobTriggerTraceId)] = Activity.Current?.Context.TraceId.ToHexString() ?? string.Empty;
data[nameof(JobTriggerSpanParentId)] = Activity.Current?.Context.SpanId.ToHexString() ?? string.Empty;
var trigger = TriggerBuilder.Create()
// TODO: Is there a simpler way of telling Quartz "Hey, enqueue this job after X delay"? Picking a unique trigger name each time seems unnecessarily complicated.
hahn-kev marked this conversation as resolved.
Show resolved Hide resolved
.WithIdentity(key.Name + "_Trigger_" + now.Ticks.ToString(), key.Group)
.StartAt(now.Add(delay))
hahn-kev marked this conversation as resolved.
Show resolved Hide resolved
.ForJob(key.Name, key.Group)
.UsingJobData(data)
.Build();
var scheduler = await schedulerFactory.GetScheduler(cancellationToken);
await scheduler.ScheduleJob(trigger, cancellationToken);
}
}
30 changes: 30 additions & 0 deletions backend/LexBoxApi/Jobs/DeleteTempDirectoryJob.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
using Quartz;

namespace LexBoxApi.Jobs;

public class DeleteTempDirectoryJob() : DelayedLexJob
{
public static async Task Queue(ISchedulerFactory schedulerFactory,
string path,
hahn-kev marked this conversation as resolved.
Show resolved Hide resolved
TimeSpan delay,
CancellationToken cancellationToken = default)
{
await QueueJob(schedulerFactory,
Key,
new JobDataMap { { nameof(Path), path } },
delay,
cancellationToken);
}

public static JobKey Key { get; } = new(nameof(DeleteTempDirectoryJob), "CleanupJobs");
public string? Path { get; set; }

protected override Task ExecuteJob(IJobExecutionContext context)
{
ArgumentException.ThrowIfNullOrEmpty(Path);
return Task.Run(() =>
hahn-kev marked this conversation as resolved.
Show resolved Hide resolved
{
if (Directory.Exists(Path)) Directory.Delete(Path, true);
});
}
}
1 change: 1 addition & 0 deletions backend/LexBoxApi/ScheduledTasksKernel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public static void AddScheduledTasks(this IServiceCollection services, IConfigur

//Setup jobs
q.AddJob<CleanupResetBackupJob>(CleanupResetBackupJob.Key);
q.AddJob<DeleteTempDirectoryJob>(DeleteTempDirectoryJob.Key, j => j.StoreDurably());
q.AddJob<UpdateProjectMetadataJob>(UpdateProjectMetadataJob.Key, j => j.StoreDurably());
q.AddJob<RetryEmailJob>(RetryEmailJob.Key, j => j.StoreDurably());
q.AddTrigger(opts => opts.ForJob(CleanupResetBackupJob.Key)
Expand Down
18 changes: 18 additions & 0 deletions backend/LexBoxApi/Services/HgService.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System.Diagnostics;
using System.IO.Compression;
using System.Net;
using System.Net.Http.Headers;
using System.Runtime.InteropServices;
using System.Text;
Expand Down Expand Up @@ -456,7 +457,7 @@
{
var hash = await GetTipHash(code, timeoutSource.Token);
var isEmpty = hash == AllZeroHash;
done = expectedState switch

Check warning on line 460 in backend/LexBoxApi/Services/HgService.cs

View workflow job for this annotation

GitHub Actions / Build API / publish-api

The switch expression does not handle some values of its input type (it is not exhaustive) involving an unnamed enum value. For example, the pattern '(LexBoxApi.Services.RepoEmptyState)2' is not covered.
{
RepoEmptyState.Empty => isEmpty,
RepoEmptyState.NonEmpty => !isEmpty
Expand Down Expand Up @@ -491,6 +492,13 @@
return version.Trim();
}

public async Task<ZipArchive?> GetLdmlZip(ProjectCode code, CancellationToken token = default)
{
var content = await ExecuteHgCommandServerCommand_ErrorsOk(code, "ldmlzip", [HttpStatusCode.Forbidden], token);
if (content is null) return null;
return new ZipArchive(await content.ReadAsStreamAsync(token), ZipArchiveMode.Read);
}

private async Task<HttpContent> ExecuteHgCommandServerCommand(ProjectCode code, string command, CancellationToken token)
{
var httpClient = _hgClient.Value;
Expand All @@ -500,6 +508,16 @@
return response.Content;
}

private async Task<HttpContent?> ExecuteHgCommandServerCommand_ErrorsOk(ProjectCode code, string command, IEnumerable<HttpStatusCode> okErrors, CancellationToken token)
hahn-kev marked this conversation as resolved.
Show resolved Hide resolved
{
var httpClient = _hgClient.Value;
var baseUri = _options.Value.HgCommandServer;
var response = await httpClient.GetAsync($"{baseUri}{code}/{command}", HttpCompletionOption.ResponseHeadersRead, token);
if (okErrors.Contains(response.StatusCode)) return null;
response.EnsureSuccessStatusCode();
return response.Content;
}

public async Task<ProjectType> DetermineProjectType(ProjectCode projectCode)
{
var response = await GetResponseMessage(projectCode, "file/tip?style=json-lex");
Expand Down
34 changes: 33 additions & 1 deletion backend/LexBoxApi/Services/ProjectService.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
using System.Data.Common;
using System.IO.Compression;
using LexBoxApi.Jobs;
using LexBoxApi.Models.Project;
using LexBoxApi.Services.Email;
using LexCore.Auth;
Expand All @@ -13,7 +15,7 @@

namespace LexBoxApi.Services;

public class ProjectService(LexBoxDbContext dbContext, IHgService hgService, IOptions<HgConfig> hgConfig, IMemoryCache memoryCache, IEmailService emailService)
public class ProjectService(LexBoxDbContext dbContext, IHgService hgService, IOptions<HgConfig> hgConfig, IMemoryCache memoryCache, IEmailService emailService, Quartz.ISchedulerFactory schedulerFactory)
{
public async Task<Guid> CreateProject(CreateProjectInput input)
{
Expand Down Expand Up @@ -269,6 +271,36 @@ public async Task ResetLexEntryCount(string projectCode)
}
}

public async Task<DirectoryInfo?> ExtractLdmlZip(Project project, string destRoot, CancellationToken token = default)
{
if (project.Type != ProjectType.FLEx) return null;
var zip = await hgService.GetLdmlZip(project.Code, token);
hahn-kev marked this conversation as resolved.
Show resolved Hide resolved
if (zip is null) return null;
var path = System.IO.Path.Join(destRoot, project.Id.ToString());
if (Directory.Exists(path)) Directory.Delete(path, true);
var dirInfo = Directory.CreateDirectory(path);
zip.ExtractToDirectory(dirInfo.FullName, true);
return dirInfo;
}

public async Task<string> PrepareLdmlZip(CancellationToken token = default)
{
var path = System.IO.Path.Join(System.IO.Path.GetTempPath(), "ldml-zip"); // TODO: pick random name, rather than predictable one
hahn-kev marked this conversation as resolved.
Show resolved Hide resolved
if (Directory.Exists(path)) Directory.Delete(path, true);
Directory.CreateDirectory(path);
await DeleteTempDirectoryJob.Queue(schedulerFactory, path, TimeSpan.FromHours(4));
var zipRoot = System.IO.Path.Join(path, "zipRoot");
Directory.CreateDirectory(zipRoot);
await foreach (var project in dbContext.Projects.Where(p => p.Type == ProjectType.FLEx).AsAsyncEnumerable())
{
await ExtractLdmlZip(project, zipRoot, token);
}
var zipFilePath = System.IO.Path.Join(path, "ldml.zip"); // TODO: Put timestamp in there
if (File.Exists(zipFilePath)) File.Delete(zipFilePath);
ZipFile.CreateFromDirectory(zipRoot, zipFilePath, CompressionLevel.Fastest, includeBaseDirectory: false);
return zipFilePath;
}

public async Task<DateTimeOffset?> UpdateLastCommit(string projectCode)
{
var project = await dbContext.Projects.FirstOrDefaultAsync(p => p.Code == projectCode);
Expand Down
1 change: 1 addition & 0 deletions backend/LexCore/ServiceInterfaces/IHgService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ public interface IHgService
Task<int?> GetRepoSizeInKb(ProjectCode code, CancellationToken token = default);
Task<int?> GetLexEntryCount(ProjectCode code, ProjectType projectType);
Task<string?> GetRepositoryIdentifier(Project project);
Task<System.IO.Compression.ZipArchive?> GetLdmlZip(ProjectCode code, CancellationToken token = default);
hahn-kev marked this conversation as resolved.
Show resolved Hide resolved
Task<HttpContent> ExecuteHgRecover(ProjectCode code, CancellationToken token);
Task<HttpContent> InvalidateDirCache(ProjectCode code, CancellationToken token = default);
bool HasAbandonedTransactions(ProjectCode projectCode);
Expand Down
26 changes: 23 additions & 3 deletions hgweb/command-runner.sh
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
#!/bin/bash

# Define the list of allowed commands
allowed_commands=("verify" "tip" "tipdate" "reposizeinkb" "wesaylexentrycount" "lexentrycount" "flexprojectid" "flexwritingsystems" "flexmodelversion" "recover" "healthz" "invalidatedircache")
allowed_commands=("verify" "tip" "tipdate" "ldmlzip" "reposizeinkb" "wesaylexentrycount" "lexentrycount" "flexprojectid" "flexwritingsystems" "flexmodelversion" "recover" "healthz" "invalidatedircache")

# Get the project code and command name from the URL
IFS='/' read -ra PATH_SEGMENTS <<< "$PATH_INFO"
project_code="${PATH_SEGMENTS[1]}"
command_name="${PATH_SEGMENTS[2]}"

# Ensure the project code and command name are safe to use in a shell command
if [[ ! $project_code =~ ^[a-z0-9][a-z0-9-]*$ ]] || [[ ! $command_name =~ ^[a-zA-Z0-9]+$ ]]; then
if [[ ! "$project_code" =~ ^[a-z0-9][a-z0-9-]*$ ]] || [[ ! "$command_name" =~ ^[a-zA-Z0-9]+$ ]]; then
echo "Content-type: text/plain"
echo "Status: 400 Bad Request"
echo ""
Expand Down Expand Up @@ -38,8 +38,23 @@ if [[ $command_name == "healthz" ]]; then
exit 0
fi

if [[ $command_name == "ldmlzip" ]]; then
# Preflight check: ldml zip access is only allowed if LexiconSettings.plsx contains addToSldr="true"
first_char=$(echo $project_code | cut -c1)
if (chg --cwd /var/hg/repos/$first_char/$project_code cat -r tip CachedSettings/SharedSettings/LexiconSettings.plsx | grep '<WritingSystems' | grep 'addToSldr="true"' >/dev/null); then
CONTENT_TYPE="application/zip"
else
echo "Content-type: text/plain"
echo "Status: 403 Forbidden"
echo ""
echo "Forbidden. Project does not allow sharing writing systems with SLDR or project does not exist"
exit 1
fi
fi

CONTENT_TYPE="${CONTENT_TYPE:-text/plain}"
# Start outputting the result right away so the HTTP connection won't be timed out
echo "Content-type: text/plain"
echo "Content-type: ${CONTENT_TYPE}"
echo ""

# Run the hg command, simply output to stdout
Expand Down Expand Up @@ -90,6 +105,11 @@ case $command_name in
du -ks .hg | cut -f1
;;

ldmlzip)
# -p '.' so that resulting zipfiles will *not* have the project name in the file paths
chg archive -p '.' -t zip -r tip -I 'CachedSettings/WritingSystemStore/*.ldml' -
;;

verify)
# Env var PYTHONUNBUFFERED required for commands like verify and recover, so that output can stream back to the project page
export PYTHONUNBUFFERED=1
Expand Down
Loading