diff --git a/src/Cellm/Models/ClientFactory.cs b/src/Cellm/Models/ClientFactory.cs index 9845a4f..c9ab4e0 100644 --- a/src/Cellm/Models/ClientFactory.cs +++ b/src/Cellm/Models/ClientFactory.cs @@ -1,5 +1,6 @@ using Cellm.Models.Anthropic; using Cellm.Models.Google; +using Cellm.Models.Llamafile; using Cellm.Models.OpenAi; using Cellm.Services; @@ -15,6 +16,7 @@ public IClient GetClient(string modelProvider) "anthropic" => ServiceLocator.Get(), "google" => ServiceLocator.Get(), "openai" => ServiceLocator.Get(), + "llamafile" => ServiceLocator.Get(), _ => throw new ArgumentException($"Unsupported client type: {modelProvider}") }; } diff --git a/src/Cellm/Models/Llamafile/AsyncLazy.cs b/src/Cellm/Models/Llamafile/AsyncLazy.cs new file mode 100644 index 0000000..9267bc9 --- /dev/null +++ b/src/Cellm/Models/Llamafile/AsyncLazy.cs @@ -0,0 +1,47 @@ +using System.Runtime.CompilerServices; + +/// +/// Provides threadsafe asynchronous lazy initialization. This type is fully threadsafe. +/// +/// The type of object that is being asynchronously initialized. +public sealed class AsyncLazy +{ + /// + /// The underlying lazy task. + /// + private readonly Lazy> instance; + + /// + /// Initializes a new instance of the class. + /// + /// The delegate that is invoked on a background thread to produce the value when it is needed. + public AsyncLazy(Func factory) + { + instance = new Lazy>(() => Task.Run(factory)); + } + + /// + /// Initializes a new instance of the class. + /// + /// The asynchronous delegate that is invoked on a background thread to produce the value when it is needed. + public AsyncLazy(Func> factory) + { + instance = new Lazy>(() => Task.Run(factory)); + } + + /// + /// Asynchronous infrastructure support. This method permits instances of to be awaited. + /// + public TaskAwaiter GetAwaiter() + { + return instance.Value.GetAwaiter(); + } + + /// + /// Starts the asynchronous initialization, if it has not already started. + /// + public void Start() + { + _ = instance.Value; + } +} \ No newline at end of file diff --git a/src/Cellm/Models/Llamafile/LLamafileProcessManager.cs b/src/Cellm/Models/Llamafile/LLamafileProcessManager.cs new file mode 100644 index 0000000..a4f2dd7 --- /dev/null +++ b/src/Cellm/Models/Llamafile/LLamafileProcessManager.cs @@ -0,0 +1,90 @@ +using System.Diagnostics; +using System.Runtime.InteropServices; + +public class LLamafileProcessManager +{ + [DllImport("kernel32.dll", CharSet = CharSet.Unicode)] + static extern IntPtr CreateJobObject(IntPtr a, string lpName); + + [DllImport("kernel32.dll")] + static extern bool AssignProcessToJobObject(IntPtr job, IntPtr process); + + [DllImport("kernel32.dll")] + static extern bool SetInformationJobObject(IntPtr hJob, JobObjectInfoType infoType, IntPtr lpJobObjectInfo, uint cbJobObjectInfoLength); + + [StructLayout(LayoutKind.Sequential)] + struct JOBOBJECT_BASIC_LIMIT_INFORMATION + { + public Int64 PerProcessUserTimeLimit; + public Int64 PerJobUserTimeLimit; + public UInt32 LimitFlags; + public UIntPtr MinimumWorkingSetSize; + public UIntPtr MaximumWorkingSetSize; + public UInt32 ActiveProcessLimit; + public UIntPtr Affinity; + public UInt32 PriorityClass; + public UInt32 SchedulingClass; + } + + [StructLayout(LayoutKind.Sequential)] + struct JOBOBJECT_EXTENDED_LIMIT_INFORMATION + { + public JOBOBJECT_BASIC_LIMIT_INFORMATION BasicLimitInformation; + public IO_COUNTERS IoInfo; + public UIntPtr ProcessMemoryLimit; + public UIntPtr JobMemoryLimit; + public UIntPtr PeakProcessMemoryUsed; + public UIntPtr PeakJobMemoryUsed; + } + + [StructLayout(LayoutKind.Sequential)] + struct IO_COUNTERS + { + public UInt64 ReadOperationCount; + public UInt64 WriteOperationCount; + public UInt64 OtherOperationCount; + public UInt64 ReadTransferCount; + public UInt64 WriteTransferCount; + public UInt64 OtherTransferCount; + } + + enum JobObjectInfoType + { + AssociateCompletionPortInformation = 7, + BasicLimitInformation = 2, + BasicUIRestrictions = 4, + EndOfJobTimeInformation = 6, + ExtendedLimitInformation = 9, + SecurityLimitInformation = 5, + GroupInformation = 11 + } + + private IntPtr _jobObject; + + public LLamafileProcessManager() + { + _jobObject = CreateJobObject(IntPtr.Zero, string.Empty); + + var info = new JOBOBJECT_BASIC_LIMIT_INFORMATION + { + LimitFlags = 0x2000 + }; + + var extendedInfo = new JOBOBJECT_EXTENDED_LIMIT_INFORMATION + { + BasicLimitInformation = info + }; + + int length = Marshal.SizeOf(typeof(JOBOBJECT_EXTENDED_LIMIT_INFORMATION)); + IntPtr extendedInfoPtr = Marshal.AllocHGlobal(length); + Marshal.StructureToPtr(extendedInfo, extendedInfoPtr, false); + + SetInformationJobObject(_jobObject, JobObjectInfoType.ExtendedLimitInformation, extendedInfoPtr, (uint)length); + Marshal.FreeHGlobal(extendedInfoPtr); + } + + public void AssignProcessToCellm(Process process) + { + AssignProcessToJobObject(_jobObject, process.Handle); + } +} \ No newline at end of file diff --git a/src/Cellm/Models/Llamafile/LlamafileClient.cs b/src/Cellm/Models/Llamafile/LlamafileClient.cs new file mode 100644 index 0000000..3cc5cf8 --- /dev/null +++ b/src/Cellm/Models/Llamafile/LlamafileClient.cs @@ -0,0 +1,159 @@ +using System.Diagnostics; +using Cellm.AddIn.Exceptions; +using Cellm.AddIn.Prompts; +using Cellm.AddIn; +using Cellm.Models.OpenAi; +using Microsoft.Extensions.Options; + +namespace Cellm.Models.Llamafile; + +internal class LlamafileClient : IClient +{ + private readonly AsyncLazy _llamafilePath; + private readonly AsyncLazy _llamafileModelPath; + private readonly AsyncLazy _llamafileProcess; + + private readonly CellmConfiguration _cellmConfiguration; + private readonly LlamafileConfiguration _llamafileConfiguration; + private readonly OpenAiConfiguration _openAiConfiguration; + + private readonly IClient _openAiClient; + private readonly HttpClient _httpClient; + private readonly LLamafileProcessManager _llamafileProcessManager; + + public LlamafileClient( + IOptions cellmConfiguration, + IOptions llamafileConfiguration, + IOptions openAiConfiguration, + IClientFactory clientFactory, + HttpClient httpClient, + LLamafileProcessManager llamafileProcessManager) + { + _cellmConfiguration = cellmConfiguration.Value; + _llamafileConfiguration = llamafileConfiguration.Value; + _openAiConfiguration = openAiConfiguration.Value; + _openAiClient = clientFactory.GetClient("openai"); + _httpClient = httpClient; + _llamafileProcessManager = llamafileProcessManager; + + _llamafilePath = new AsyncLazy(async () => + { + return await DownloadFile(_llamafileConfiguration.LlamafileUrl, $"Llamafile.exe", httpClient); + }); + + _llamafileModelPath = new AsyncLazy(async () => + { + return await DownloadFile(_llamafileConfiguration.Models[_llamafileConfiguration.DefaultModel], $"Llamafile-{_llamafileConfiguration.DefaultModel}", httpClient); + }); + + _llamafileProcess = new AsyncLazy(async () => + { + return await StartProcess(); + }); + } + + public async Task Send(Prompt prompt, string? provider, string? model) + { + await _llamafilePath; + await _llamafileModelPath; + await _llamafileProcess; + return await _openAiClient.Send(prompt, provider, model); + } + + private async Task StartProcess() + { + var processStartInfo = new ProcessStartInfo(await _llamafilePath); + processStartInfo.Arguments += $"-m {await _llamafileModelPath} "; + processStartInfo.Arguments += $"--port {_llamafileConfiguration.Port} "; + + if (!_cellmConfiguration.Debug) + { + processStartInfo.Arguments += "--disable-browser "; + } + + if (_llamafileConfiguration.Gpu) + { + processStartInfo.Arguments += $"-ngl {_llamafileConfiguration.GpuLayers} "; + } + + var process = Process.Start(processStartInfo) ?? throw new CellmException("Failed to start Llamafile server"); + + try + { + await WaitForLlamafile(process); + _llamafileProcessManager.AssignProcessToCellm(process); + return process; + } + catch + { + process.Kill(); + throw; + } + } + + private static async Task DownloadFile(Uri uri, string filename, HttpClient httpClient) + { + var filePath = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), nameof(Cellm), filename); + Directory.CreateDirectory(Path.GetDirectoryName(filePath) ?? throw new CellmException("Failed to create Llamafile path")); + + if (File.Exists(filePath)) + { + return filePath; + } + + var filePathPart = filePath + ".part"; + + if (File.Exists(filePathPart)) + { + File.Delete(filePathPart); + } + + var response = await httpClient.GetAsync(uri, HttpCompletionOption.ResponseHeadersRead); + response.EnsureSuccessStatusCode(); + + using (var fileStream = File.Create(filePathPart)) + using (var httpStream = await response.Content.ReadAsStreamAsync()) + { + + await httpStream.CopyToAsync(fileStream).ConfigureAwait(false); + } + + File.Move(filePathPart, filePath); + + return filePath; + } + + private async Task WaitForLlamafile(Process process) + { + var cancellationTokenSource = new CancellationTokenSource(TimeSpan.FromSeconds(1)); + var startTime = DateTime.UtcNow; + + while ((DateTime.UtcNow - startTime).TotalSeconds < 30) // Max 30 seconds timeout + { + if (process.HasExited) + { + throw new CellmException($"Failed to run Llamafile. Exit code: {process.ExitCode}"); + } + + try + { + var response = await _httpClient.GetAsync($"{_openAiConfiguration.BaseAddress}/health", cancellationTokenSource.Token); + if (response.StatusCode == System.Net.HttpStatusCode.OK) + { + return; // Server is healthy + } + } + catch (TaskCanceledException) + { + } + catch (HttpRequestException) + { + } + + await Task.Delay(500); // Wait for 500ms before next attempt + } + + throw new CellmException("Timeout waiting for Llamafile server to be ready"); + } +} + diff --git a/src/Cellm/Models/Llamafile/LlamafileConfiguration.cs b/src/Cellm/Models/Llamafile/LlamafileConfiguration.cs new file mode 100644 index 0000000..b8e28f8 --- /dev/null +++ b/src/Cellm/Models/Llamafile/LlamafileConfiguration.cs @@ -0,0 +1,25 @@ +namespace Cellm.Models.Llamafile; + +internal class LlamafileConfiguration +{ + public Uri LlamafileUrl { get; init; } + + public Dictionary Models { get; init; } + + public string DefaultModel { get; init; } + + public ushort Port { get; init; } + + public bool Gpu { get; init; } + + public int GpuLayers { get; init; } + + public LlamafileConfiguration() + { + LlamafileUrl = default!; + Models = default!; + DefaultModel = default!; + Gpu = false; + GpuLayers = 999; + } +} diff --git a/src/Cellm/Services/ServiceLocator.cs b/src/Cellm/Services/ServiceLocator.cs index 6509ebd..6eb6127 100644 --- a/src/Cellm/Services/ServiceLocator.cs +++ b/src/Cellm/Services/ServiceLocator.cs @@ -3,6 +3,7 @@ using Cellm.Models; using Cellm.Models.Anthropic; using Cellm.Models.Google; +using Cellm.Models.Llamafile; using Cellm.Models.OpenAi; using Cellm.Services.Configuration; using ExcelDna.Integration; @@ -42,6 +43,7 @@ private static IServiceCollection ConfigureServices(IServiceCollection services) .Configure(configuration.GetRequiredSection(nameof(AnthropicConfiguration))) .Configure(configuration.GetRequiredSection(nameof(GoogleConfiguration))) .Configure(configuration.GetRequiredSection(nameof(OpenAiConfiguration))) + .Configure(configuration.GetRequiredSection(nameof(LlamafileConfiguration))) .Configure(configuration.GetRequiredSection(nameof(RateLimiterConfiguration))) .Configure(configuration.GetRequiredSection(nameof(CircuitBreakerConfiguration))) .Configure(configuration.GetRequiredSection(nameof(RetryConfiguration))) @@ -83,7 +85,8 @@ private static IServiceCollection ConfigureServices(IServiceCollection services) .AddSingleton() .AddSingleton() .AddSingleton() - .AddSingleton(); + .AddSingleton() + .AddSingleton(); // Model Providers var rateLimiterConfiguration = configuration.GetRequiredSection(nameof(RateLimiterConfiguration)).Get() @@ -125,6 +128,8 @@ private static IServiceCollection ConfigureServices(IServiceCollection services) openAiHttpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {openAiConfiguration.ApiKey}"); }).AddResilienceHandler("OpenAiResiliencePipeline", resiliencePipelineConfigurator.ConfigureResiliencePipeline); + services.AddSingleton(); + return services; } } diff --git a/src/Cellm/appsettings.Local.Llamafile.GPU.json b/src/Cellm/appsettings.Local.Llamafile.GPU.json new file mode 100644 index 0000000..fd5df78 --- /dev/null +++ b/src/Cellm/appsettings.Local.Llamafile.GPU.json @@ -0,0 +1,18 @@ +{ + "LlamafileConfiguration": { + "LlamafileUrl": "https://github.com/Mozilla-Ocho/llamafile/releases/download/0.8.13/llamafile-0.8.13", + "DefaultModel": "qwen-2.5-3b-instruct-q6-k-l", + "Models": { + "qwen-2.5-3b-instruct-q6-k-l": "https://huggingface.co/bartowski/Qwen2.5-3B-Instruct-GGUF/resolve/main/Qwen2.5-3B-Instruct-Q6_K_L.gguf" + }, + "Port": 22195, + "GPU": true, + "GpuLayers": 999 + }, + "OpenAiConfiguration": { + "BaseAddress": "http://localhost:22195" + }, + "CellmConfiguration": { + "DefaultModelProvider": "Llamafile" + } +} diff --git a/src/Cellm/appsettings.Local.Llamafile.json b/src/Cellm/appsettings.Local.Llamafile.json new file mode 100644 index 0000000..8c8014f --- /dev/null +++ b/src/Cellm/appsettings.Local.Llamafile.json @@ -0,0 +1,16 @@ +{ + "LlamafileConfiguration": { + "LlamafileUrl": "https://github.com/Mozilla-Ocho/llamafile/releases/download/0.8.13/llamafile-0.8.13", + "DefaultModel": "qwen-2.5-3b-instruct-q6-k-l", + "Models": { + "qwen-2.5-3b-instruct-q6-k-l": "https://huggingface.co/bartowski/Qwen2.5-3B-Instruct-GGUF/resolve/main/Qwen2.5-3B-Instruct-Q6_K_L.gguf" + }, + "Port": 22195 + }, + "OpenAiConfiguration": { + "BaseAddress": "http://localhost:22195" + }, + "CellmConfiguration": { + "DefaultModelProvider": "Llamafile" + } +}