From 701d2af6a7bebe47591d9e80e028d7954e2feee9 Mon Sep 17 00:00:00 2001 From: Kasper Marstal Date: Sat, 21 Sep 2024 09:48:18 +0200 Subject: [PATCH] models: Revert Llamafile model to gemma 2 2b; Simplify system prompt, small models are less capable of following instructions, larger models will get it anyway (#5) --- src/Cellm/AddIn/Prompts/CellmPrompts.cs | 14 +++++--------- src/Cellm/AddIn/Prompts/PromptBuilder.cs | 5 ----- src/Cellm/Models/Llamafile/LlamafileClient.cs | 13 +++++++------ src/Cellm/appsettings.Local.Llamafile.GPU.json | 5 ++--- src/Cellm/appsettings.Local.Llamafile.json | 5 ++--- 5 files changed, 16 insertions(+), 26 deletions(-) diff --git a/src/Cellm/AddIn/Prompts/CellmPrompts.cs b/src/Cellm/AddIn/Prompts/CellmPrompts.cs index 0953d26..c3fd445 100644 --- a/src/Cellm/AddIn/Prompts/CellmPrompts.cs +++ b/src/Cellm/AddIn/Prompts/CellmPrompts.cs @@ -4,28 +4,24 @@ internal static class CellmPrompts { public const string SystemMessage = @" -The user has called you via the ""Prompt"" Excel function in a cell formula. -The argument to the formula is the range of cells the user selected, e.g. ""=Prompt(A1)"" or ""=Prompt(A1:D10)"". -The cells are rendered as a table where each cell conist of its coordinate and value. +The user has called you via an Excel formula. +The Excel sheet is rendered as a table where each cell conist of its coordinate and value. The table is your context and you must use it when following the user's instructions. -You can only solve tasks that return data suitable for a single cell in a spreadsheet and in a format that is plain text or a numeric value. If you cannot find any instructions, or you cannot follow user's instructions in a cell-appropriate format, reply with ""#INSTRUCTION_ERROR?"" and nothing else. -Return ONLY the result of following the user's instructions. -The result must be one of the following: +Return ONLY the result of following the user's instructions as plain text without formatting. +Your response MUST be EITHER: - A single word or number - A comma-separated list of words or numbers -- A brief sentence +- A sentence -Be concise. Cells have limited visible space. Do not provide explanations, steps, or engage in conversation. -Ensure the output is directly usable in a spreadsheet cell. "; diff --git a/src/Cellm/AddIn/Prompts/PromptBuilder.cs b/src/Cellm/AddIn/Prompts/PromptBuilder.cs index 4f70f81..68b2701 100644 --- a/src/Cellm/AddIn/Prompts/PromptBuilder.cs +++ b/src/Cellm/AddIn/Prompts/PromptBuilder.cs @@ -21,11 +21,6 @@ public PromptBuilder(Prompt prompt) public PromptBuilder SetSystemMessage(string systemMessage) { - if (_messages.Any(x => x.Role.Equals(Role.System))) - { - throw new CellmException("Cannot set system message when messages already has one"); - } - _systemMessage = systemMessage; return this; } diff --git a/src/Cellm/Models/Llamafile/LlamafileClient.cs b/src/Cellm/Models/Llamafile/LlamafileClient.cs index 3fd370b..357afbb 100644 --- a/src/Cellm/Models/Llamafile/LlamafileClient.cs +++ b/src/Cellm/Models/Llamafile/LlamafileClient.cs @@ -42,7 +42,7 @@ public LlamafileClient(IOptions cellmConfiguration, _llamafileModelPath = new AsyncLazy(async () => { - return await DownloadFile(_llamafileConfiguration.Models[_llamafileConfiguration.DefaultModel], $"Llamafile-{_llamafileConfiguration.DefaultModel}", httpClient); + return await DownloadFile(_llamafileConfiguration.Models[_llamafileConfiguration.DefaultModel], $"Llamafile-model-weights-{_llamafileConfiguration.DefaultModel}", httpClient); }); _llamafileProcess = new AsyncLazy(async () => @@ -79,7 +79,8 @@ private async Task StartProcess() try { - await WaitForLlamafile(process); + Thread.Sleep(5000); + // await WaitForLlamafile(process); _llamafileProcessManager.AssignProcessToCellm(process); return process; } @@ -127,7 +128,7 @@ private async Task WaitForLlamafile(Process process) var cancellationTokenSource = new CancellationTokenSource(TimeSpan.FromSeconds(1)); var startTime = DateTime.UtcNow; - // 30 seconds timeout + // Max 30 seconds timeout while ((DateTime.UtcNow - startTime).TotalSeconds < 30) { if (process.HasExited) @@ -144,14 +145,14 @@ private async Task WaitForLlamafile(Process process) return; } } - catch (TaskCanceledException) + catch (HttpRequestException) { } - catch (HttpRequestException) + catch (TaskCanceledException) { } - // Wait for before next attempt + // Wait before next attempt await Task.Delay(500); } diff --git a/src/Cellm/appsettings.Local.Llamafile.GPU.json b/src/Cellm/appsettings.Local.Llamafile.GPU.json index ddf5711..10a9a46 100644 --- a/src/Cellm/appsettings.Local.Llamafile.GPU.json +++ b/src/Cellm/appsettings.Local.Llamafile.GPU.json @@ -1,10 +1,9 @@ { "LlamafileConfiguration": { "LlamafileUrl": "https://github.com/Mozilla-Ocho/llamafile/releases/download/0.8.13/llamafile-0.8.13", - "DefaultModel": "qwen-2.5-3b-instruct-q6-k-l", + "DefaultModel": "gemma-2-2b-instruct-q6-k", "Models": { - "qwen-2.5-3b-instruct-q6-k-l": "https://huggingface.co/bartowski/Qwen2.5-3B-Instruct-GGUF/resolve/main/Qwen2.5-3B-Instruct-Q6_K_L.gguf?download=true", - "qwen-2.5-0.5b-instruct-q6-k-l": "https://huggingface.co/bartowski/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/Qwen2.5-0.5B-Instruct-Q6_K_L.gguf?download=true" + "gemma-2-2b-instruct-q6-k": "https://huggingface.co/bartowski/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q6_K.gguf" }, "Port": 22195, "GPU": true, diff --git a/src/Cellm/appsettings.Local.Llamafile.json b/src/Cellm/appsettings.Local.Llamafile.json index 53632f3..f45e147 100644 --- a/src/Cellm/appsettings.Local.Llamafile.json +++ b/src/Cellm/appsettings.Local.Llamafile.json @@ -1,10 +1,9 @@ { "LlamafileConfiguration": { "LlamafileUrl": "https://github.com/Mozilla-Ocho/llamafile/releases/download/0.8.13/llamafile-0.8.13", - "DefaultModel": "qwen-2.5-3b-instruct-q6-k-l", + "DefaultModel": "gemma-2-2b-instruct-q6-k", "Models": { - "qwen-2.5-3b-instruct-q6-k-l": "https://huggingface.co/bartowski/Qwen2.5-3B-Instruct-GGUF/resolve/main/Qwen2.5-3B-Instruct-Q6_K_L.gguf?download=true", - "qwen-2.5-0.5b-instruct-q6-k-l": "https://huggingface.co/bartowski/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/Qwen2.5-0.5B-Instruct-Q6_K_L.gguf?download=true" + "gemma-2-2b-instruct-q6-k": "https://huggingface.co/bartowski/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q6_K.gguf" }, "Port": 22195 },