Skip to content

Commit

Permalink
feat: Add LlamafileClient
Browse files Browse the repository at this point in the history
  • Loading branch information
kaspermarstal committed Sep 19, 2024
1 parent 110589d commit e49dd72
Show file tree
Hide file tree
Showing 8 changed files with 363 additions and 1 deletion.
2 changes: 2 additions & 0 deletions src/Cellm/Models/ClientFactory.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using Cellm.Models.Anthropic;
using Cellm.Models.Google;
using Cellm.Models.Llamafile;
using Cellm.Models.OpenAi;
using Cellm.Services;

Expand All @@ -15,6 +16,7 @@ public IClient GetClient(string modelProvider)
"anthropic" => ServiceLocator.Get<AnthropicClient>(),
"google" => ServiceLocator.Get<GoogleClient>(),
"openai" => ServiceLocator.Get<OpenAiClient>(),
"llamafile" => ServiceLocator.Get<LlamafileClient>(),
_ => throw new ArgumentException($"Unsupported client type: {modelProvider}")
};
}
Expand Down
47 changes: 47 additions & 0 deletions src/Cellm/Models/Llamafile/AsyncLazy.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
using System.Runtime.CompilerServices;

/// <summary>
/// Provides threadsafe asynchronous lazy initialization. This type is fully threadsafe.
/// </summary>
/// <typeparam name="T">The type of object that is being asynchronously initialized.</typeparam>
public sealed class AsyncLazy<T>
{
/// <summary>
/// The underlying lazy task.
/// </summary>
private readonly Lazy<Task<T>> instance;

/// <summary>
/// Initializes a new instance of the <see cref="AsyncLazy<T>"/> class.
/// </summary>
/// <param name="factory">The delegate that is invoked on a background thread to produce the value when it is needed.</param>
public AsyncLazy(Func<T> factory)
{
instance = new Lazy<Task<T>>(() => Task.Run(factory));
}

/// <summary>
/// Initializes a new instance of the <see cref="AsyncLazy<T>"/> class.
/// </summary>
/// <param name="factory">The asynchronous delegate that is invoked on a background thread to produce the value when it is needed.</param>
public AsyncLazy(Func<Task<T>> factory)
{
instance = new Lazy<Task<T>>(() => Task.Run(factory));
}

/// <summary>
/// Asynchronous infrastructure support. This method permits instances of <see cref="AsyncLazy<T>"/> to be awaited.
/// </summary>
public TaskAwaiter<T> GetAwaiter()
{
return instance.Value.GetAwaiter();
}

/// <summary>
/// Starts the asynchronous initialization, if it has not already started.
/// </summary>
public void Start()
{
_ = instance.Value;
}
}
90 changes: 90 additions & 0 deletions src/Cellm/Models/Llamafile/LLamafileProcessManager.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
using System.Diagnostics;
using System.Runtime.InteropServices;

public class LLamafileProcessManager
{
[DllImport("kernel32.dll", CharSet = CharSet.Unicode)]
static extern IntPtr CreateJobObject(IntPtr a, string lpName);

[DllImport("kernel32.dll")]
static extern bool AssignProcessToJobObject(IntPtr job, IntPtr process);

[DllImport("kernel32.dll")]
static extern bool SetInformationJobObject(IntPtr hJob, JobObjectInfoType infoType, IntPtr lpJobObjectInfo, uint cbJobObjectInfoLength);

[StructLayout(LayoutKind.Sequential)]
struct JOBOBJECT_BASIC_LIMIT_INFORMATION
{
public Int64 PerProcessUserTimeLimit;
public Int64 PerJobUserTimeLimit;
public UInt32 LimitFlags;
public UIntPtr MinimumWorkingSetSize;
public UIntPtr MaximumWorkingSetSize;
public UInt32 ActiveProcessLimit;
public UIntPtr Affinity;
public UInt32 PriorityClass;
public UInt32 SchedulingClass;
}

[StructLayout(LayoutKind.Sequential)]
struct JOBOBJECT_EXTENDED_LIMIT_INFORMATION
{
public JOBOBJECT_BASIC_LIMIT_INFORMATION BasicLimitInformation;
public IO_COUNTERS IoInfo;
public UIntPtr ProcessMemoryLimit;
public UIntPtr JobMemoryLimit;
public UIntPtr PeakProcessMemoryUsed;
public UIntPtr PeakJobMemoryUsed;
}

[StructLayout(LayoutKind.Sequential)]
struct IO_COUNTERS
{
public UInt64 ReadOperationCount;
public UInt64 WriteOperationCount;
public UInt64 OtherOperationCount;
public UInt64 ReadTransferCount;
public UInt64 WriteTransferCount;
public UInt64 OtherTransferCount;
}

enum JobObjectInfoType
{
AssociateCompletionPortInformation = 7,
BasicLimitInformation = 2,
BasicUIRestrictions = 4,
EndOfJobTimeInformation = 6,
ExtendedLimitInformation = 9,
SecurityLimitInformation = 5,
GroupInformation = 11
}

private IntPtr _jobObject;

public LLamafileProcessManager()
{
_jobObject = CreateJobObject(IntPtr.Zero, string.Empty);

var info = new JOBOBJECT_BASIC_LIMIT_INFORMATION
{
LimitFlags = 0x2000
};

var extendedInfo = new JOBOBJECT_EXTENDED_LIMIT_INFORMATION
{
BasicLimitInformation = info
};

int length = Marshal.SizeOf(typeof(JOBOBJECT_EXTENDED_LIMIT_INFORMATION));
IntPtr extendedInfoPtr = Marshal.AllocHGlobal(length);
Marshal.StructureToPtr(extendedInfo, extendedInfoPtr, false);

SetInformationJobObject(_jobObject, JobObjectInfoType.ExtendedLimitInformation, extendedInfoPtr, (uint)length);
Marshal.FreeHGlobal(extendedInfoPtr);
}

public void AssignProcessToCellm(Process process)
{
AssignProcessToJobObject(_jobObject, process.Handle);
}
}
159 changes: 159 additions & 0 deletions src/Cellm/Models/Llamafile/LlamafileClient.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
using System.Diagnostics;
using Cellm.AddIn.Exceptions;
using Cellm.AddIn.Prompts;
using Cellm.AddIn;
using Cellm.Models.OpenAi;
using Microsoft.Extensions.Options;

namespace Cellm.Models.Llamafile;

internal class LlamafileClient : IClient
{
private readonly AsyncLazy<string> _llamafilePath;
private readonly AsyncLazy<string> _llamafileModelPath;
private readonly AsyncLazy<Process> _llamafileProcess;

private readonly CellmConfiguration _cellmConfiguration;
private readonly LlamafileConfiguration _llamafileConfiguration;
private readonly OpenAiConfiguration _openAiConfiguration;

private readonly IClient _openAiClient;
private readonly HttpClient _httpClient;
private readonly LLamafileProcessManager _llamafileProcessManager;

public LlamafileClient(
IOptions<CellmConfiguration> cellmConfiguration,
IOptions<LlamafileConfiguration> llamafileConfiguration,
IOptions<OpenAiConfiguration> openAiConfiguration,
IClientFactory clientFactory,
HttpClient httpClient,
LLamafileProcessManager llamafileProcessManager)
{
_cellmConfiguration = cellmConfiguration.Value;
_llamafileConfiguration = llamafileConfiguration.Value;
_openAiConfiguration = openAiConfiguration.Value;
_openAiClient = clientFactory.GetClient("openai");
_httpClient = httpClient;
_llamafileProcessManager = llamafileProcessManager;

_llamafilePath = new AsyncLazy<string>(async () =>
{
return await DownloadFile(_llamafileConfiguration.LlamafileUrl, $"Llamafile.exe", httpClient);
});

_llamafileModelPath = new AsyncLazy<string>(async () =>
{
return await DownloadFile(_llamafileConfiguration.Models[_llamafileConfiguration.DefaultModel], $"Llamafile-{_llamafileConfiguration.DefaultModel}", httpClient);
});

_llamafileProcess = new AsyncLazy<Process>(async () =>
{
return await StartProcess();
});
}

public async Task<Prompt> Send(Prompt prompt, string? provider, string? model)
{
await _llamafilePath;
await _llamafileModelPath;
await _llamafileProcess;
return await _openAiClient.Send(prompt, provider, model);
}

private async Task<Process> StartProcess()
{
var processStartInfo = new ProcessStartInfo(await _llamafilePath);
processStartInfo.Arguments += $"-m {await _llamafileModelPath} ";
processStartInfo.Arguments += $"--port {_llamafileConfiguration.Port} ";

if (!_cellmConfiguration.Debug)
{
processStartInfo.Arguments += "--disable-browser ";
}

if (_llamafileConfiguration.Gpu)
{
processStartInfo.Arguments += $"-ngl {_llamafileConfiguration.GpuLayers} ";
}

var process = Process.Start(processStartInfo) ?? throw new CellmException("Failed to start Llamafile server");

try
{
await WaitForLlamafile(process);
_llamafileProcessManager.AssignProcessToCellm(process);
return process;
}
catch
{
process.Kill();
throw;
}
}

private static async Task<string> DownloadFile(Uri uri, string filename, HttpClient httpClient)
{
var filePath = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), nameof(Cellm), filename);
Directory.CreateDirectory(Path.GetDirectoryName(filePath) ?? throw new CellmException("Failed to create Llamafile path"));

if (File.Exists(filePath))
{
return filePath;
}

var filePathPart = filePath + ".part";

if (File.Exists(filePathPart))
{
File.Delete(filePathPart);
}

var response = await httpClient.GetAsync(uri, HttpCompletionOption.ResponseHeadersRead);
response.EnsureSuccessStatusCode();

using (var fileStream = File.Create(filePathPart))
using (var httpStream = await response.Content.ReadAsStreamAsync())
{

await httpStream.CopyToAsync(fileStream).ConfigureAwait(false);
}

File.Move(filePathPart, filePath);

return filePath;
}

private async Task WaitForLlamafile(Process process)
{
var cancellationTokenSource = new CancellationTokenSource(TimeSpan.FromSeconds(1));
var startTime = DateTime.UtcNow;

while ((DateTime.UtcNow - startTime).TotalSeconds < 30) // Max 30 seconds timeout
{
if (process.HasExited)
{
throw new CellmException($"Failed to run Llamafile. Exit code: {process.ExitCode}");
}

try
{
var response = await _httpClient.GetAsync($"{_openAiConfiguration.BaseAddress}/health", cancellationTokenSource.Token);
if (response.StatusCode == System.Net.HttpStatusCode.OK)
{
return; // Server is healthy
}
}
catch (TaskCanceledException)
{
}
catch (HttpRequestException)
{
}

await Task.Delay(500); // Wait for 500ms before next attempt
}

throw new CellmException("Timeout waiting for Llamafile server to be ready");
}
}

25 changes: 25 additions & 0 deletions src/Cellm/Models/Llamafile/LlamafileConfiguration.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
namespace Cellm.Models.Llamafile;

internal class LlamafileConfiguration
{
public Uri LlamafileUrl { get; init; }

public Dictionary<string, Uri> Models { get; init; }

public string DefaultModel { get; init; }

public ushort Port { get; init; }

public bool Gpu { get; init; }

public int GpuLayers { get; init; }

public LlamafileConfiguration()
{
LlamafileUrl = default!;
Models = default!;
DefaultModel = default!;
Gpu = false;
GpuLayers = 999;
}
}
7 changes: 6 additions & 1 deletion src/Cellm/Services/ServiceLocator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using Cellm.Models;
using Cellm.Models.Anthropic;
using Cellm.Models.Google;
using Cellm.Models.Llamafile;
using Cellm.Models.OpenAi;
using Cellm.Services.Configuration;
using ExcelDna.Integration;
Expand Down Expand Up @@ -42,6 +43,7 @@ private static IServiceCollection ConfigureServices(IServiceCollection services)
.Configure<AnthropicConfiguration>(configuration.GetRequiredSection(nameof(AnthropicConfiguration)))
.Configure<GoogleConfiguration>(configuration.GetRequiredSection(nameof(GoogleConfiguration)))
.Configure<OpenAiConfiguration>(configuration.GetRequiredSection(nameof(OpenAiConfiguration)))
.Configure<LlamafileConfiguration>(configuration.GetRequiredSection(nameof(LlamafileConfiguration)))
.Configure<RateLimiterConfiguration>(configuration.GetRequiredSection(nameof(RateLimiterConfiguration)))
.Configure<CircuitBreakerConfiguration>(configuration.GetRequiredSection(nameof(CircuitBreakerConfiguration)))
.Configure<RetryConfiguration>(configuration.GetRequiredSection(nameof(RetryConfiguration)))
Expand Down Expand Up @@ -83,7 +85,8 @@ private static IServiceCollection ConfigureServices(IServiceCollection services)
.AddSingleton<IClientFactory, ClientFactory>()
.AddSingleton<IClient, Client>()
.AddSingleton<ICache, Cache>()
.AddSingleton<ISerde, Serde>();
.AddSingleton<ISerde, Serde>()
.AddSingleton<LLamafileProcessManager>();

// Model Providers
var rateLimiterConfiguration = configuration.GetRequiredSection(nameof(RateLimiterConfiguration)).Get<RateLimiterConfiguration>()
Expand Down Expand Up @@ -125,6 +128,8 @@ private static IServiceCollection ConfigureServices(IServiceCollection services)
openAiHttpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {openAiConfiguration.ApiKey}");
}).AddResilienceHandler("OpenAiResiliencePipeline", resiliencePipelineConfigurator.ConfigureResiliencePipeline);

services.AddSingleton<LlamafileClient>();

return services;
}
}
18 changes: 18 additions & 0 deletions src/Cellm/appsettings.Local.Llamafile.GPU.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"LlamafileConfiguration": {
"LlamafileUrl": "https://github.com/Mozilla-Ocho/llamafile/releases/download/0.8.13/llamafile-0.8.13",
"DefaultModel": "qwen-2.5-3b-instruct-q6-k-l",
"Models": {
"qwen-2.5-3b-instruct-q6-k-l": "https://huggingface.co/bartowski/Qwen2.5-3B-Instruct-GGUF/resolve/main/Qwen2.5-3B-Instruct-Q6_K_L.gguf"
},
"Port": 22195,
"GPU": true,
"GpuLayers": 999
},
"OpenAiConfiguration": {
"BaseAddress": "http://localhost:22195"
},
"CellmConfiguration": {
"DefaultModelProvider": "Llamafile"
}
}
Loading

0 comments on commit e49dd72

Please sign in to comment.