Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drive health #156

Merged
merged 8 commits into from
Jan 16, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM mcr.microsoft.com/dotnet/sdk:6.0-jammy AS build-env
FROM mcr.microsoft.com/dotnet/sdk:8.0-jammy AS build-env
WORKDIR /app

RUN apt-get update && apt-get install -y g++ curl cmake
Expand All @@ -12,7 +12,7 @@ RUN dotnet publish ./src/SIL.Machine.Serval.EngineServer/SIL.Machine.Serval.Engi
RUN dotnet publish ./src/SIL.Machine.Serval.JobServer/SIL.Machine.Serval.JobServer.csproj -c Release -o out_job_server

# Build runtime image
FROM mcr.microsoft.com/dotnet/aspnet:6.0-jammy as production
FROM mcr.microsoft.com/dotnet/aspnet:8.0-jammy as production
# libgomp needed for thot
RUN apt-get update && apt-get install -y libgomp1
WORKDIR /app
Expand Down
2 changes: 1 addition & 1 deletion dockerfile.development
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM mcr.microsoft.com/dotnet/sdk:6.0-jammy
FROM mcr.microsoft.com/dotnet/sdk:8.0-jammy
# libgomp needed for thot
RUN apt update && apt install -y unzip libgomp1 && \
curl -sSL https://aka.ms/getvsdbgsh | /bin/sh /dev/stdin -v latest -l /remote_debugger
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,10 @@ public static IMachineBuilder AddUnigramTruecaser(this IMachineBuilder builder)

public static IMachineBuilder AddClearMLService(this IMachineBuilder builder, string? connectionString = null)
{
connectionString ??= builder.Configuration.GetConnectionString("ClearML");
connectionString ??= builder.Configuration!.GetConnectionString("ClearML");
builder.Services
.AddHttpClient("ClearML")
.ConfigureHttpClient(httpClient => httpClient.BaseAddress = new Uri(connectionString))
.ConfigureHttpClient(httpClient => httpClient.BaseAddress = new Uri(connectionString!))
// Add retry policy; fail after approx. 2 + 4 + 8 = 14 seconds
.AddTransientHttpErrorPolicy(
b => b.WaitAndRetryAsync(3, retryAttempt => TimeSpan.FromSeconds(Math.Pow(2, retryAttempt)))
Expand All @@ -120,8 +120,9 @@ public static IMachineBuilder AddClearMLService(this IMachineBuilder builder, st

builder.Services
.AddHttpClient("ClearML-NoRetry")
.ConfigureHttpClient(httpClient => httpClient.BaseAddress = new Uri(connectionString));
.ConfigureHttpClient(httpClient => httpClient.BaseAddress = new Uri(connectionString!));
builder.Services.AddSingleton<ClearMLHealthCheck>();

builder.Services.AddHealthChecks().AddCheck<ClearMLHealthCheck>("ClearML Health Check");

return builder;
Expand Down Expand Up @@ -158,7 +159,7 @@ public static IMachineBuilder AddMongoHangfireJobClient(
.UseSimpleAssemblyNameTypeSerializer()
.UseRecommendedSerializerSettings()
.UseMongoStorage(
connectionString ?? builder.Configuration.GetConnectionString("Hangfire"),
connectionString ?? builder.Configuration!.GetConnectionString("Hangfire"),
new MongoStorageOptions
{
MigrationOptions = new MongoMigrationOptions
Expand Down Expand Up @@ -220,9 +221,9 @@ public static IMachineBuilder AddMemoryDataAccess(this IMachineBuilder builder)

public static IMachineBuilder AddMongoDataAccess(this IMachineBuilder builder, string? connectionString = null)
{
connectionString ??= builder.Configuration.GetConnectionString("Mongo");
connectionString ??= builder.Configuration!.GetConnectionString("Mongo");
builder.Services.AddMongoDataAccess(
connectionString,
connectionString!,
"SIL.Machine.AspNetCore.Models",
o =>
{
Expand Down Expand Up @@ -257,7 +258,7 @@ await c.Indexes.CreateOrUpdateAsync(
);
}
);
builder.Services.AddHealthChecks().AddMongoDb(connectionString, name: "Mongo");
builder.Services.AddHealthChecks().AddMongoDb(connectionString!, name: "Mongo");

return builder;
}
Expand All @@ -271,7 +272,7 @@ public static IMachineBuilder AddServalPlatformService(
builder.Services
.AddGrpcClient<TranslationPlatformApi.TranslationPlatformApiClient>(o =>
{
o.Address = new Uri(connectionString ?? builder.Configuration.GetConnectionString("Serval"));
o.Address = new Uri(connectionString ?? builder.Configuration!.GetConnectionString("Serval")!);
})
.ConfigureChannel(o =>
{
Expand Down Expand Up @@ -321,7 +322,7 @@ public static IMachineBuilder AddServalTranslationEngineService(
options.Interceptors.Add<CancellationInterceptor>();
options.Interceptors.Add<UnimplementedInterceptor>();
});
builder.AddServalPlatformService(connectionString ?? builder.Configuration.GetConnectionString("Serval"));
builder.AddServalPlatformService(connectionString ?? builder.Configuration!.GetConnectionString("Serval"));
engineTypes ??=
builder.Configuration?.GetSection("TranslationEngines").Get<TranslationEngineType[]?>()
?? new[] { TranslationEngineType.SmtTransfer, TranslationEngineType.Nmt };
Expand All @@ -340,7 +341,6 @@ public static IMachineBuilder AddServalTranslationEngineService(
break;
}
}
builder.Services.AddGrpcHealthChecks();

return builder;
}
Expand All @@ -359,7 +359,7 @@ Action<BuildJobOptions> configureOptions
public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder, IConfiguration config)
{
builder.Services.Configure<BuildJobOptions>(config);
var options = config.Get<BuildJobOptions>();
var options = config.Get<BuildJobOptions>()!;
return builder.AddBuildJobService(options);
}

Expand All @@ -368,7 +368,24 @@ public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder)
if (builder.Configuration is null)
builder.AddBuildJobService(o => { });
else
{
builder.AddBuildJobService(builder.Configuration.GetSection(BuildJobOptions.Key));

string EnginesDir = builder.Configuration
.GetSection(SmtTransferEngineOptions.Key)!
.GetValue<string>("EnginesDir")!;

string driveLetter = Path.GetPathRoot(EnginesDir)![..1];
// add health check for disk storage capacity
builder.Services
.AddHealthChecks()
.AddDiskStorageHealthCheck(
x => x.AddDrive(driveLetter, 1_000), // 1GB
"SMT Engine Storage Capacity",
HealthStatus.Degraded
);
}

return builder;
}

Expand Down
3 changes: 2 additions & 1 deletion src/SIL.Machine.AspNetCore/SIL.Machine.AspNetCore.csproj
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<TargetFramework>net8.0</TargetFramework>
<Description>An ASP.NET Core web API middleware for the Machine library.</Description>
<NoWarn>1591</NoWarn>
<ImplicitUsings>enable</ImplicitUsings>
Expand All @@ -26,6 +26,7 @@

<ItemGroup>
<PackageReference Include="AspNetCore.HealthChecks.MongoDb" Version="6.0.2" />
<PackageReference Include="AspNetCore.HealthChecks.System" Version="6.0.2" />
<PackageReference Include="AWSSDK.S3" Version="3.7.205.8" />
<PackageReference Include="Grpc.AspNetCore" Version="2.57.0" />
<PackageReference Include="Grpc.AspNetCore.HealthChecks" Version="2.57.0" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ public class ClearMLAuthenticationService : RecurrentTask, IClearMLAuthenticatio
// technically, the token should be good for 30 days, but let's refresh each hour
// to know well ahead of time if something is wrong.
private static readonly TimeSpan RefreshPeriod = TimeSpan.FromSeconds(3600);
private string _authToken = "";
private string? _authToken = "";

public ClearMLAuthenticationService(
IServiceProvider services,
Expand All @@ -29,14 +29,14 @@ public async Task<string> GetAuthTokenAsync(CancellationToken cancellationToken
{
using (await _lock.LockAsync(cancellationToken))
{
if (_authToken is "")
if (_authToken is null || _authToken is "")
{
//Should only happen once, so no different in cost than previous solution
_logger.LogInformation("Token was empty; refreshing");
await AuthorizeAsync(cancellationToken);
}
}
return _authToken;
return _authToken ?? throw new Exception("ClearML authentication token not found in response.");
}

protected override async Task DoWorkAsync(IServiceScope scope, CancellationToken cancellationToken)
Expand All @@ -49,6 +49,9 @@ protected override async Task DoWorkAsync(IServiceScope scope, CancellationToken
catch (Exception e)
{
_logger.LogError(e, "Error occurred while refreshing ClearML authentication token.");
if (_authToken is null || _authToken is "")
// The ClearML token never was set. We can't continue without it.
throw;
}
}

Expand All @@ -64,6 +67,8 @@ private async Task AuthorizeAsync(CancellationToken cancellationToken)
HttpResponseMessage response = await _httpClient.SendAsync(request, cancellationToken);
string result = await response.Content.ReadAsStringAsync(cancellationToken);
_authToken = (string)((JsonObject?)JsonNode.Parse(result))?["data"]?["token"]!;
if (_authToken is null || _authToken is "")
throw new Exception($"ClearML authentication failed - {response.StatusCode}: {response.ReasonPhrase}");
_logger.LogInformation("ClearML Authentication Token Refresh Successful.");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,15 @@

private readonly Dictionary<TranslationEngineType, ITranslationEngineService> _engineServices;

public ServalTranslationEngineServiceV1(IEnumerable<ITranslationEngineService> engineServices)
private readonly HealthCheckService _healthCheckService;

public ServalTranslationEngineServiceV1(
IEnumerable<ITranslationEngineService> engineServices,
HealthCheckService healthCheckService
)
{
_engineServices = engineServices.ToDictionary(es => es.Type);
_healthCheckService = healthCheckService;
}

public override async Task<Empty> Create(CreateRequest request, ServerCallContext context)
Expand Down Expand Up @@ -127,6 +133,13 @@
return new GetQueueSizeResponse { Size = await engineService.GetQueueSizeAsync(context.CancellationToken) };
}

public override async Task<HealthCheckResponse> HealthCheck(Empty request, ServerCallContext context)

Check failure on line 136 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on ubuntu-20.04

The type or namespace name 'HealthCheckResponse' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 136 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on ubuntu-20.04

'ServalTranslationEngineServiceV1.HealthCheck(Empty, ServerCallContext)': no suitable method found to override

Check failure on line 136 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on ubuntu-20.04

The type or namespace name 'HealthCheckResponse' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 136 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on ubuntu-20.04

'ServalTranslationEngineServiceV1.HealthCheck(Empty, ServerCallContext)': no suitable method found to override

Check failure on line 136 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on windows-latest

The type or namespace name 'HealthCheckResponse' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 136 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on windows-latest

'ServalTranslationEngineServiceV1.HealthCheck(Empty, ServerCallContext)': no suitable method found to override

Check failure on line 136 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on windows-latest

The type or namespace name 'HealthCheckResponse' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 136 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on windows-latest

'ServalTranslationEngineServiceV1.HealthCheck(Empty, ServerCallContext)': no suitable method found to override
{
HealthReport healthReport = await _healthCheckService.CheckHealthAsync();
HealthCheckResponse healthCheckResponse = WriteGrpcHealthCheckResponse.Generate(healthReport);
return healthCheckResponse;
}

private ITranslationEngineService GetEngineService(string engineTypeStr)
{
if (_engineServices.TryGetValue(GetEngineType(engineTypeStr), out ITranslationEngineService? service))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<TargetFramework>net8.0</TargetFramework>
<RootNamespace>SIL.Machine.Morphology.HermitCrab</RootNamespace>
<PackAsTool>true</PackAsTool>
<ToolCommandName>hc</ToolCommandName>
Expand Down
2 changes: 1 addition & 1 deletion src/SIL.Machine.Plugin/SIL.Machine.Plugin.csproj
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<TargetFramework>net8.0</TargetFramework>
<Description>A plugin framework for the Machine library.</Description>
</PropertyGroup>

Expand Down
1 change: 0 additions & 1 deletion src/SIL.Machine.Serval.EngineServer/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
app.UseHttpsRedirection();

app.MapServalTranslationEngineService();
app.MapGrpcHealthChecksService();
app.MapHangfireDashboard();

app.Run();
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk.Web">

<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<TargetFramework>net8.0</TargetFramework>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<UserSecretsId>34e222a9-ef76-48f9-869e-338547f9bd25</UserSecretsId>
Expand All @@ -23,7 +23,7 @@

<!-- Include icu.net.dll.config - which is only available after the package is built -->
<ItemGroup>
<ResolvedFileToPublish Include=".\bin\Release\net6.0\icu.net.dll.config">
<ResolvedFileToPublish Include=".\bin\Release\net8.0\icu.net.dll.config">
<RelativePath>icu.net.dll.config</RelativePath>
</ResolvedFileToPublish>
</ItemGroup>
Expand Down
2 changes: 0 additions & 2 deletions src/SIL.Machine.Serval.JobServer/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,4 @@

var app = builder.Build();

app.MapHealthChecks("/health");

app.Run();
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk.Web">

<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<TargetFramework>net8.0</TargetFramework>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<UserSecretsId>aa9e7440-5a04-4de6-ba51-bab9ef4a62e1</UserSecretsId>
Expand All @@ -25,7 +25,7 @@

<!-- Include icu.net.dll.config - which is only available after the package is built -->
<ItemGroup>
<ResolvedFileToPublish Include=".\bin\Release\net6.0\icu.net.dll.config">
<ResolvedFileToPublish Include=".\bin\Release\net8.0\icu.net.dll.config">
<RelativePath>icu.net.dll.config</RelativePath>
</ResolvedFileToPublish>
</ItemGroup>
Expand Down
2 changes: 1 addition & 1 deletion src/SIL.Machine.Tool/SIL.Machine.Tool.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<TargetFramework>net8.0</TargetFramework>
<RootNamespace>SIL.Machine</RootNamespace>
<PackAsTool>true</PackAsTool>
<ToolCommandName>machine</ToolCommandName>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<TargetFramework>net8.0</TargetFramework>
<RootNamespace>SIL.Machine.AspNetCore</RootNamespace>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<TargetFramework>net8.0</TargetFramework>
<RootNamespace>SIL.Machine.Morphology.HermitCrab</RootNamespace>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
Expand Down
2 changes: 1 addition & 1 deletion tests/SIL.Machine.Tests/SIL.Machine.Tests.csproj
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<TargetFramework>net8.0</TargetFramework>
<RootNamespace>SIL.Machine</RootNamespace>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<TargetFramework>net8.0</TargetFramework>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<IsPackable>false</IsPackable>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<TargetFramework>net8.0</TargetFramework>
<RootNamespace>SIL.Machine.Translation.Thot</RootNamespace>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
Expand Down
Loading