feat: Add LlamafileClient

getcellm · Sep 19, 2024 · e49dd72 · e49dd72
1 parent 110589d
commit e49dd72
Show file tree

Hide file tree

Showing 8 changed files with 363 additions and 1 deletion.
diff --git a/src/Cellm/Models/ClientFactory.cs b/src/Cellm/Models/ClientFactory.cs
@@ -1,5 +1,6 @@
 using Cellm.Models.Anthropic;
 using Cellm.Models.Google;
+using Cellm.Models.Llamafile;
 using Cellm.Models.OpenAi;
 using Cellm.Services;
 
@@ -15,6 +16,7 @@ public IClient GetClient(string modelProvider)
             "anthropic" => ServiceLocator.Get<AnthropicClient>(),
             "google" => ServiceLocator.Get<GoogleClient>(),
             "openai" => ServiceLocator.Get<OpenAiClient>(),
+            "llamafile" => ServiceLocator.Get<LlamafileClient>(),
             _ => throw new ArgumentException($"Unsupported client type: {modelProvider}")
         };
     }

diff --git a/src/Cellm/Models/Llamafile/AsyncLazy.cs b/src/Cellm/Models/Llamafile/AsyncLazy.cs
@@ -0,0 +1,47 @@
+using System.Runtime.CompilerServices;
+
+/// <summary>
+/// Provides threadsafe asynchronous lazy initialization. This type is fully threadsafe.
+/// </summary>
+/// <typeparam name="T">The type of object that is being asynchronously initialized.</typeparam>
+public sealed class AsyncLazy<T>
+{
+    /// <summary>
+    /// The underlying lazy task.
+    /// </summary>
+    private readonly Lazy<Task<T>> instance;
+
+    /// <summary>
+    /// Initializes a new instance of the <see cref="AsyncLazy<T>"/> class.
+    /// </summary>
+    /// <param name="factory">The delegate that is invoked on a background thread to produce the value when it is needed.</param>
+    public AsyncLazy(Func<T> factory)
+    {
+        instance = new Lazy<Task<T>>(() => Task.Run(factory));
+    }
+
+    /// <summary>
+    /// Initializes a new instance of the <see cref="AsyncLazy<T>"/> class.
+    /// </summary>
+    /// <param name="factory">The asynchronous delegate that is invoked on a background thread to produce the value when it is needed.</param>
+    public AsyncLazy(Func<Task<T>> factory)
+    {
+        instance = new Lazy<Task<T>>(() => Task.Run(factory));
+    }
+
+    /// <summary>
+    /// Asynchronous infrastructure support. This method permits instances of <see cref="AsyncLazy<T>"/> to be awaited.
+    /// </summary>
+    public TaskAwaiter<T> GetAwaiter()
+    {
+        return instance.Value.GetAwaiter();
+    }
+
+    /// <summary>
+    /// Starts the asynchronous initialization, if it has not already started.
+    /// </summary>
+    public void Start()
+    {
+        _ = instance.Value;
+    }
+}
diff --git a/src/Cellm/Models/Llamafile/LLamafileProcessManager.cs b/src/Cellm/Models/Llamafile/LLamafileProcessManager.cs
@@ -0,0 +1,90 @@
+using System.Diagnostics;
+using System.Runtime.InteropServices;
+
+public class LLamafileProcessManager
+{
+    [DllImport("kernel32.dll", CharSet = CharSet.Unicode)]
+    static extern IntPtr CreateJobObject(IntPtr a, string lpName);
+
+    [DllImport("kernel32.dll")]
+    static extern bool AssignProcessToJobObject(IntPtr job, IntPtr process);
+
+    [DllImport("kernel32.dll")]
+    static extern bool SetInformationJobObject(IntPtr hJob, JobObjectInfoType infoType, IntPtr lpJobObjectInfo, uint cbJobObjectInfoLength);
+
+    [StructLayout(LayoutKind.Sequential)]
+    struct JOBOBJECT_BASIC_LIMIT_INFORMATION
+    {
+        public Int64 PerProcessUserTimeLimit;
+        public Int64 PerJobUserTimeLimit;
+        public UInt32 LimitFlags;
+        public UIntPtr MinimumWorkingSetSize;
+        public UIntPtr MaximumWorkingSetSize;
+        public UInt32 ActiveProcessLimit;
+        public UIntPtr Affinity;
+        public UInt32 PriorityClass;
+        public UInt32 SchedulingClass;
+    }
+
+    [StructLayout(LayoutKind.Sequential)]
+    struct JOBOBJECT_EXTENDED_LIMIT_INFORMATION
+    {
+        public JOBOBJECT_BASIC_LIMIT_INFORMATION BasicLimitInformation;
+        public IO_COUNTERS IoInfo;
+        public UIntPtr ProcessMemoryLimit;
+        public UIntPtr JobMemoryLimit;
+        public UIntPtr PeakProcessMemoryUsed;
+        public UIntPtr PeakJobMemoryUsed;
+    }
+
+    [StructLayout(LayoutKind.Sequential)]
+    struct IO_COUNTERS
+    {
+        public UInt64 ReadOperationCount;
+        public UInt64 WriteOperationCount;
+        public UInt64 OtherOperationCount;
+        public UInt64 ReadTransferCount;
+        public UInt64 WriteTransferCount;
+        public UInt64 OtherTransferCount;
+    }
+
+    enum JobObjectInfoType
+    {
+        AssociateCompletionPortInformation = 7,
+        BasicLimitInformation = 2,
+        BasicUIRestrictions = 4,
+        EndOfJobTimeInformation = 6,
+        ExtendedLimitInformation = 9,
+        SecurityLimitInformation = 5,
+        GroupInformation = 11
+    }
+
+    private IntPtr _jobObject;
+
+    public LLamafileProcessManager()
+    {
+        _jobObject = CreateJobObject(IntPtr.Zero, string.Empty);
+
+        var info = new JOBOBJECT_BASIC_LIMIT_INFORMATION
+        {
+            LimitFlags = 0x2000
+        };
+
+        var extendedInfo = new JOBOBJECT_EXTENDED_LIMIT_INFORMATION
+        {
+            BasicLimitInformation = info
+        };
+
+        int length = Marshal.SizeOf(typeof(JOBOBJECT_EXTENDED_LIMIT_INFORMATION));
+        IntPtr extendedInfoPtr = Marshal.AllocHGlobal(length);
+        Marshal.StructureToPtr(extendedInfo, extendedInfoPtr, false);
+
+        SetInformationJobObject(_jobObject, JobObjectInfoType.ExtendedLimitInformation, extendedInfoPtr, (uint)length);
+        Marshal.FreeHGlobal(extendedInfoPtr);
+    }
+
+    public void AssignProcessToCellm(Process process)
+    {
+        AssignProcessToJobObject(_jobObject, process.Handle);
+    }
+}
diff --git a/src/Cellm/Models/Llamafile/LlamafileClient.cs b/src/Cellm/Models/Llamafile/LlamafileClient.cs
@@ -0,0 +1,159 @@
+using System.Diagnostics;
+using Cellm.AddIn.Exceptions;
+using Cellm.AddIn.Prompts;
+using Cellm.AddIn;
+using Cellm.Models.OpenAi;
+using Microsoft.Extensions.Options;
+
+namespace Cellm.Models.Llamafile;
+
+internal class LlamafileClient : IClient
+{
+    private readonly AsyncLazy<string> _llamafilePath;
+    private readonly AsyncLazy<string> _llamafileModelPath;
+    private readonly AsyncLazy<Process> _llamafileProcess;
+
+    private readonly CellmConfiguration _cellmConfiguration;
+    private readonly LlamafileConfiguration _llamafileConfiguration;
+    private readonly OpenAiConfiguration _openAiConfiguration;
+
+    private readonly IClient _openAiClient;
+    private readonly HttpClient _httpClient;
+    private readonly LLamafileProcessManager _llamafileProcessManager;
+
+    public LlamafileClient(
+        IOptions<CellmConfiguration> cellmConfiguration, 
+        IOptions<LlamafileConfiguration> llamafileConfiguration,
+        IOptions<OpenAiConfiguration> openAiConfiguration,
+        IClientFactory clientFactory, 
+        HttpClient httpClient,
+        LLamafileProcessManager llamafileProcessManager)
+    {
+        _cellmConfiguration = cellmConfiguration.Value;
+        _llamafileConfiguration = llamafileConfiguration.Value;
+        _openAiConfiguration = openAiConfiguration.Value;
+        _openAiClient = clientFactory.GetClient("openai");
+        _httpClient = httpClient;
+        _llamafileProcessManager = llamafileProcessManager;
+
+        _llamafilePath = new AsyncLazy<string>(async () =>
+         {
+             return await DownloadFile(_llamafileConfiguration.LlamafileUrl, $"Llamafile.exe", httpClient);
+         });
+
+        _llamafileModelPath = new AsyncLazy<string>(async () =>
+        {
+            return await DownloadFile(_llamafileConfiguration.Models[_llamafileConfiguration.DefaultModel], $"Llamafile-{_llamafileConfiguration.DefaultModel}", httpClient);
+        });
+
+        _llamafileProcess = new AsyncLazy<Process>(async () =>
+        {
+            return await StartProcess();
+        });
+    }
+
+    public async Task<Prompt> Send(Prompt prompt, string? provider, string? model)
+    {
+        await _llamafilePath;
+        await _llamafileModelPath;
+        await _llamafileProcess;
+        return await _openAiClient.Send(prompt, provider, model);
+    }
+
+    private async Task<Process> StartProcess()
+    {
+        var processStartInfo = new ProcessStartInfo(await _llamafilePath);
+        processStartInfo.Arguments += $"-m {await _llamafileModelPath} ";
+        processStartInfo.Arguments += $"--port {_llamafileConfiguration.Port} ";
+
+        if (!_cellmConfiguration.Debug)
+        {
+            processStartInfo.Arguments += "--disable-browser ";
+        }
+
+        if (_llamafileConfiguration.Gpu)
+        {
+            processStartInfo.Arguments += $"-ngl {_llamafileConfiguration.GpuLayers} ";
+        }
+
+        var process = Process.Start(processStartInfo) ?? throw new CellmException("Failed to start Llamafile server");
+
+        try
+        {
+            await WaitForLlamafile(process);
+            _llamafileProcessManager.AssignProcessToCellm(process);
+            return process;
+        }
+        catch
+        {
+            process.Kill();
+            throw;
+        }
+    }
+
+    private static async Task<string> DownloadFile(Uri uri, string filename, HttpClient httpClient)
+    {
+        var filePath = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), nameof(Cellm), filename);
+        Directory.CreateDirectory(Path.GetDirectoryName(filePath) ?? throw new CellmException("Failed to create Llamafile path"));
+
+        if (File.Exists(filePath))
+        {
+            return filePath;
+        }
+
+        var filePathPart = filePath + ".part";
+
+        if (File.Exists(filePathPart))
+        {
+            File.Delete(filePathPart);
+        }
+
+        var response = await httpClient.GetAsync(uri, HttpCompletionOption.ResponseHeadersRead);
+        response.EnsureSuccessStatusCode();
+
+        using (var fileStream = File.Create(filePathPart))
+        using (var httpStream = await response.Content.ReadAsStreamAsync())
+        {
+
+            await httpStream.CopyToAsync(fileStream).ConfigureAwait(false);
+        }
+
+        File.Move(filePathPart, filePath);
+
+        return filePath;
+    }
+
+    private async Task WaitForLlamafile(Process process)
+    {
+        var cancellationTokenSource = new CancellationTokenSource(TimeSpan.FromSeconds(1));
+        var startTime = DateTime.UtcNow;
+
+        while ((DateTime.UtcNow - startTime).TotalSeconds < 30) // Max 30 seconds timeout
+        {
+            if (process.HasExited)
+            {
+                throw new CellmException($"Failed to run Llamafile. Exit code: {process.ExitCode}");
+            }
+
+            try
+            {
+                var response = await _httpClient.GetAsync($"{_openAiConfiguration.BaseAddress}/health", cancellationTokenSource.Token);
+                if (response.StatusCode == System.Net.HttpStatusCode.OK)
+                {
+                    return; // Server is healthy
+                }
+            }
+            catch (TaskCanceledException)
+            {
+            }
+            catch (HttpRequestException)
+            {
+            }
+
+            await Task.Delay(500); // Wait for 500ms before next attempt
+        }
+
+        throw new CellmException("Timeout waiting for Llamafile server to be ready");
+    }
+}
+
diff --git a/src/Cellm/Models/Llamafile/LlamafileConfiguration.cs b/src/Cellm/Models/Llamafile/LlamafileConfiguration.cs
@@ -0,0 +1,25 @@
+namespace Cellm.Models.Llamafile;
+
+internal class LlamafileConfiguration
+{
+    public Uri LlamafileUrl { get; init; }
+
+    public Dictionary<string, Uri> Models { get; init; }
+
+    public string DefaultModel { get; init; }
+
+    public ushort Port { get; init; }
+
+    public bool Gpu { get; init; }
+
+    public int GpuLayers { get; init; }
+
+    public LlamafileConfiguration()
+    {
+        LlamafileUrl = default!;
+        Models = default!;
+        DefaultModel = default!;
+        Gpu = false;
+        GpuLayers = 999;
+    }
+}
diff --git a/src/Cellm/Services/ServiceLocator.cs b/src/Cellm/Services/ServiceLocator.cs
@@ -3,6 +3,7 @@
 using Cellm.Models;
 using Cellm.Models.Anthropic;
 using Cellm.Models.Google;
+using Cellm.Models.Llamafile;
 using Cellm.Models.OpenAi;
 using Cellm.Services.Configuration;
 using ExcelDna.Integration;
@@ -42,6 +43,7 @@ private static IServiceCollection ConfigureServices(IServiceCollection services)
             .Configure<AnthropicConfiguration>(configuration.GetRequiredSection(nameof(AnthropicConfiguration)))
             .Configure<GoogleConfiguration>(configuration.GetRequiredSection(nameof(GoogleConfiguration)))
             .Configure<OpenAiConfiguration>(configuration.GetRequiredSection(nameof(OpenAiConfiguration)))
+            .Configure<LlamafileConfiguration>(configuration.GetRequiredSection(nameof(LlamafileConfiguration)))
             .Configure<RateLimiterConfiguration>(configuration.GetRequiredSection(nameof(RateLimiterConfiguration)))
             .Configure<CircuitBreakerConfiguration>(configuration.GetRequiredSection(nameof(CircuitBreakerConfiguration)))
             .Configure<RetryConfiguration>(configuration.GetRequiredSection(nameof(RetryConfiguration)))
@@ -83,7 +85,8 @@ private static IServiceCollection ConfigureServices(IServiceCollection services)
             .AddSingleton<IClientFactory, ClientFactory>()
             .AddSingleton<IClient, Client>()
             .AddSingleton<ICache, Cache>()
-            .AddSingleton<ISerde, Serde>();
+            .AddSingleton<ISerde, Serde>()
+            .AddSingleton<LLamafileProcessManager>();
 
         // Model Providers
         var rateLimiterConfiguration = configuration.GetRequiredSection(nameof(RateLimiterConfiguration)).Get<RateLimiterConfiguration>()
@@ -125,6 +128,8 @@ private static IServiceCollection ConfigureServices(IServiceCollection services)
             openAiHttpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {openAiConfiguration.ApiKey}");
         }).AddResilienceHandler("OpenAiResiliencePipeline", resiliencePipelineConfigurator.ConfigureResiliencePipeline);
 
+        services.AddSingleton<LlamafileClient>();
+
         return services;
     }
 }
diff --git a/src/Cellm/appsettings.Local.Llamafile.GPU.json b/src/Cellm/appsettings.Local.Llamafile.GPU.json
@@ -0,0 +1,18 @@
+{
+    "LlamafileConfiguration": {
+        "LlamafileUrl": "https://github.com/Mozilla-Ocho/llamafile/releases/download/0.8.13/llamafile-0.8.13",
+        "DefaultModel": "qwen-2.5-3b-instruct-q6-k-l",
+        "Models": {
+            "qwen-2.5-3b-instruct-q6-k-l": "https://huggingface.co/bartowski/Qwen2.5-3B-Instruct-GGUF/resolve/main/Qwen2.5-3B-Instruct-Q6_K_L.gguf"
+        },
+        "Port": 22195,
+        "GPU": true,
+        "GpuLayers": 999
+    },
+    "OpenAiConfiguration": {
+       "BaseAddress": "http://localhost:22195"
+    },
+    "CellmConfiguration": {
+        "DefaultModelProvider": "Llamafile"
+    }
+}