-
Notifications
You must be signed in to change notification settings - Fork 27
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
110589d
commit e49dd72
Showing
8 changed files
with
363 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
using System.Runtime.CompilerServices; | ||
|
||
/// <summary> | ||
/// Provides threadsafe asynchronous lazy initialization. This type is fully threadsafe. | ||
/// </summary> | ||
/// <typeparam name="T">The type of object that is being asynchronously initialized.</typeparam> | ||
public sealed class AsyncLazy<T> | ||
{ | ||
/// <summary> | ||
/// The underlying lazy task. | ||
/// </summary> | ||
private readonly Lazy<Task<T>> instance; | ||
|
||
/// <summary> | ||
/// Initializes a new instance of the <see cref="AsyncLazy<T>"/> class. | ||
/// </summary> | ||
/// <param name="factory">The delegate that is invoked on a background thread to produce the value when it is needed.</param> | ||
public AsyncLazy(Func<T> factory) | ||
{ | ||
instance = new Lazy<Task<T>>(() => Task.Run(factory)); | ||
} | ||
|
||
/// <summary> | ||
/// Initializes a new instance of the <see cref="AsyncLazy<T>"/> class. | ||
/// </summary> | ||
/// <param name="factory">The asynchronous delegate that is invoked on a background thread to produce the value when it is needed.</param> | ||
public AsyncLazy(Func<Task<T>> factory) | ||
{ | ||
instance = new Lazy<Task<T>>(() => Task.Run(factory)); | ||
} | ||
|
||
/// <summary> | ||
/// Asynchronous infrastructure support. This method permits instances of <see cref="AsyncLazy<T>"/> to be awaited. | ||
/// </summary> | ||
public TaskAwaiter<T> GetAwaiter() | ||
{ | ||
return instance.Value.GetAwaiter(); | ||
} | ||
|
||
/// <summary> | ||
/// Starts the asynchronous initialization, if it has not already started. | ||
/// </summary> | ||
public void Start() | ||
{ | ||
_ = instance.Value; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
using System.Diagnostics; | ||
using System.Runtime.InteropServices; | ||
|
||
public class LLamafileProcessManager | ||
{ | ||
[DllImport("kernel32.dll", CharSet = CharSet.Unicode)] | ||
static extern IntPtr CreateJobObject(IntPtr a, string lpName); | ||
|
||
[DllImport("kernel32.dll")] | ||
static extern bool AssignProcessToJobObject(IntPtr job, IntPtr process); | ||
|
||
[DllImport("kernel32.dll")] | ||
static extern bool SetInformationJobObject(IntPtr hJob, JobObjectInfoType infoType, IntPtr lpJobObjectInfo, uint cbJobObjectInfoLength); | ||
|
||
[StructLayout(LayoutKind.Sequential)] | ||
struct JOBOBJECT_BASIC_LIMIT_INFORMATION | ||
{ | ||
public Int64 PerProcessUserTimeLimit; | ||
public Int64 PerJobUserTimeLimit; | ||
public UInt32 LimitFlags; | ||
public UIntPtr MinimumWorkingSetSize; | ||
public UIntPtr MaximumWorkingSetSize; | ||
public UInt32 ActiveProcessLimit; | ||
public UIntPtr Affinity; | ||
public UInt32 PriorityClass; | ||
public UInt32 SchedulingClass; | ||
} | ||
|
||
[StructLayout(LayoutKind.Sequential)] | ||
struct JOBOBJECT_EXTENDED_LIMIT_INFORMATION | ||
{ | ||
public JOBOBJECT_BASIC_LIMIT_INFORMATION BasicLimitInformation; | ||
public IO_COUNTERS IoInfo; | ||
public UIntPtr ProcessMemoryLimit; | ||
public UIntPtr JobMemoryLimit; | ||
public UIntPtr PeakProcessMemoryUsed; | ||
public UIntPtr PeakJobMemoryUsed; | ||
} | ||
|
||
[StructLayout(LayoutKind.Sequential)] | ||
struct IO_COUNTERS | ||
{ | ||
public UInt64 ReadOperationCount; | ||
public UInt64 WriteOperationCount; | ||
public UInt64 OtherOperationCount; | ||
public UInt64 ReadTransferCount; | ||
public UInt64 WriteTransferCount; | ||
public UInt64 OtherTransferCount; | ||
} | ||
|
||
enum JobObjectInfoType | ||
{ | ||
AssociateCompletionPortInformation = 7, | ||
BasicLimitInformation = 2, | ||
BasicUIRestrictions = 4, | ||
EndOfJobTimeInformation = 6, | ||
ExtendedLimitInformation = 9, | ||
SecurityLimitInformation = 5, | ||
GroupInformation = 11 | ||
} | ||
|
||
private IntPtr _jobObject; | ||
|
||
public LLamafileProcessManager() | ||
{ | ||
_jobObject = CreateJobObject(IntPtr.Zero, string.Empty); | ||
|
||
var info = new JOBOBJECT_BASIC_LIMIT_INFORMATION | ||
{ | ||
LimitFlags = 0x2000 | ||
}; | ||
|
||
var extendedInfo = new JOBOBJECT_EXTENDED_LIMIT_INFORMATION | ||
{ | ||
BasicLimitInformation = info | ||
}; | ||
|
||
int length = Marshal.SizeOf(typeof(JOBOBJECT_EXTENDED_LIMIT_INFORMATION)); | ||
IntPtr extendedInfoPtr = Marshal.AllocHGlobal(length); | ||
Marshal.StructureToPtr(extendedInfo, extendedInfoPtr, false); | ||
|
||
SetInformationJobObject(_jobObject, JobObjectInfoType.ExtendedLimitInformation, extendedInfoPtr, (uint)length); | ||
Marshal.FreeHGlobal(extendedInfoPtr); | ||
} | ||
|
||
public void AssignProcessToCellm(Process process) | ||
{ | ||
AssignProcessToJobObject(_jobObject, process.Handle); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
using System.Diagnostics; | ||
using Cellm.AddIn.Exceptions; | ||
using Cellm.AddIn.Prompts; | ||
using Cellm.AddIn; | ||
using Cellm.Models.OpenAi; | ||
using Microsoft.Extensions.Options; | ||
|
||
namespace Cellm.Models.Llamafile; | ||
|
||
internal class LlamafileClient : IClient | ||
{ | ||
private readonly AsyncLazy<string> _llamafilePath; | ||
private readonly AsyncLazy<string> _llamafileModelPath; | ||
private readonly AsyncLazy<Process> _llamafileProcess; | ||
|
||
private readonly CellmConfiguration _cellmConfiguration; | ||
private readonly LlamafileConfiguration _llamafileConfiguration; | ||
private readonly OpenAiConfiguration _openAiConfiguration; | ||
|
||
private readonly IClient _openAiClient; | ||
private readonly HttpClient _httpClient; | ||
private readonly LLamafileProcessManager _llamafileProcessManager; | ||
|
||
public LlamafileClient( | ||
IOptions<CellmConfiguration> cellmConfiguration, | ||
IOptions<LlamafileConfiguration> llamafileConfiguration, | ||
IOptions<OpenAiConfiguration> openAiConfiguration, | ||
IClientFactory clientFactory, | ||
HttpClient httpClient, | ||
LLamafileProcessManager llamafileProcessManager) | ||
{ | ||
_cellmConfiguration = cellmConfiguration.Value; | ||
_llamafileConfiguration = llamafileConfiguration.Value; | ||
_openAiConfiguration = openAiConfiguration.Value; | ||
_openAiClient = clientFactory.GetClient("openai"); | ||
_httpClient = httpClient; | ||
_llamafileProcessManager = llamafileProcessManager; | ||
|
||
_llamafilePath = new AsyncLazy<string>(async () => | ||
{ | ||
return await DownloadFile(_llamafileConfiguration.LlamafileUrl, $"Llamafile.exe", httpClient); | ||
}); | ||
|
||
_llamafileModelPath = new AsyncLazy<string>(async () => | ||
{ | ||
return await DownloadFile(_llamafileConfiguration.Models[_llamafileConfiguration.DefaultModel], $"Llamafile-{_llamafileConfiguration.DefaultModel}", httpClient); | ||
}); | ||
|
||
_llamafileProcess = new AsyncLazy<Process>(async () => | ||
{ | ||
return await StartProcess(); | ||
}); | ||
} | ||
|
||
public async Task<Prompt> Send(Prompt prompt, string? provider, string? model) | ||
{ | ||
await _llamafilePath; | ||
await _llamafileModelPath; | ||
await _llamafileProcess; | ||
return await _openAiClient.Send(prompt, provider, model); | ||
} | ||
|
||
private async Task<Process> StartProcess() | ||
{ | ||
var processStartInfo = new ProcessStartInfo(await _llamafilePath); | ||
processStartInfo.Arguments += $"-m {await _llamafileModelPath} "; | ||
processStartInfo.Arguments += $"--port {_llamafileConfiguration.Port} "; | ||
|
||
if (!_cellmConfiguration.Debug) | ||
{ | ||
processStartInfo.Arguments += "--disable-browser "; | ||
} | ||
|
||
if (_llamafileConfiguration.Gpu) | ||
{ | ||
processStartInfo.Arguments += $"-ngl {_llamafileConfiguration.GpuLayers} "; | ||
} | ||
|
||
var process = Process.Start(processStartInfo) ?? throw new CellmException("Failed to start Llamafile server"); | ||
|
||
try | ||
{ | ||
await WaitForLlamafile(process); | ||
_llamafileProcessManager.AssignProcessToCellm(process); | ||
return process; | ||
} | ||
catch | ||
{ | ||
process.Kill(); | ||
throw; | ||
} | ||
} | ||
|
||
private static async Task<string> DownloadFile(Uri uri, string filename, HttpClient httpClient) | ||
{ | ||
var filePath = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), nameof(Cellm), filename); | ||
Directory.CreateDirectory(Path.GetDirectoryName(filePath) ?? throw new CellmException("Failed to create Llamafile path")); | ||
|
||
if (File.Exists(filePath)) | ||
{ | ||
return filePath; | ||
} | ||
|
||
var filePathPart = filePath + ".part"; | ||
|
||
if (File.Exists(filePathPart)) | ||
{ | ||
File.Delete(filePathPart); | ||
} | ||
|
||
var response = await httpClient.GetAsync(uri, HttpCompletionOption.ResponseHeadersRead); | ||
response.EnsureSuccessStatusCode(); | ||
|
||
using (var fileStream = File.Create(filePathPart)) | ||
using (var httpStream = await response.Content.ReadAsStreamAsync()) | ||
{ | ||
|
||
await httpStream.CopyToAsync(fileStream).ConfigureAwait(false); | ||
} | ||
|
||
File.Move(filePathPart, filePath); | ||
|
||
return filePath; | ||
} | ||
|
||
private async Task WaitForLlamafile(Process process) | ||
{ | ||
var cancellationTokenSource = new CancellationTokenSource(TimeSpan.FromSeconds(1)); | ||
var startTime = DateTime.UtcNow; | ||
|
||
while ((DateTime.UtcNow - startTime).TotalSeconds < 30) // Max 30 seconds timeout | ||
{ | ||
if (process.HasExited) | ||
{ | ||
throw new CellmException($"Failed to run Llamafile. Exit code: {process.ExitCode}"); | ||
} | ||
|
||
try | ||
{ | ||
var response = await _httpClient.GetAsync($"{_openAiConfiguration.BaseAddress}/health", cancellationTokenSource.Token); | ||
if (response.StatusCode == System.Net.HttpStatusCode.OK) | ||
{ | ||
return; // Server is healthy | ||
} | ||
} | ||
catch (TaskCanceledException) | ||
{ | ||
} | ||
catch (HttpRequestException) | ||
{ | ||
} | ||
|
||
await Task.Delay(500); // Wait for 500ms before next attempt | ||
} | ||
|
||
throw new CellmException("Timeout waiting for Llamafile server to be ready"); | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
namespace Cellm.Models.Llamafile; | ||
|
||
internal class LlamafileConfiguration | ||
{ | ||
public Uri LlamafileUrl { get; init; } | ||
|
||
public Dictionary<string, Uri> Models { get; init; } | ||
|
||
public string DefaultModel { get; init; } | ||
|
||
public ushort Port { get; init; } | ||
|
||
public bool Gpu { get; init; } | ||
|
||
public int GpuLayers { get; init; } | ||
|
||
public LlamafileConfiguration() | ||
{ | ||
LlamafileUrl = default!; | ||
Models = default!; | ||
DefaultModel = default!; | ||
Gpu = false; | ||
GpuLayers = 999; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
{ | ||
"LlamafileConfiguration": { | ||
"LlamafileUrl": "https://github.com/Mozilla-Ocho/llamafile/releases/download/0.8.13/llamafile-0.8.13", | ||
"DefaultModel": "qwen-2.5-3b-instruct-q6-k-l", | ||
"Models": { | ||
"qwen-2.5-3b-instruct-q6-k-l": "https://huggingface.co/bartowski/Qwen2.5-3B-Instruct-GGUF/resolve/main/Qwen2.5-3B-Instruct-Q6_K_L.gguf" | ||
}, | ||
"Port": 22195, | ||
"GPU": true, | ||
"GpuLayers": 999 | ||
}, | ||
"OpenAiConfiguration": { | ||
"BaseAddress": "http://localhost:22195" | ||
}, | ||
"CellmConfiguration": { | ||
"DefaultModelProvider": "Llamafile" | ||
} | ||
} |
Oops, something went wrong.