From 701d2af6a7bebe47591d9e80e028d7954e2feee9 Mon Sep 17 00:00:00 2001
From: Kasper Marstal <kaspermarstal@gmail.com>
Date: Sat, 21 Sep 2024 09:48:18 +0200
Subject: [PATCH] models: Revert Llamafile model to gemma 2 2b; Simplify system
 prompt, small models are less capable of following instructions, larger
 models will get it anyway (#5)

---
 src/Cellm/AddIn/Prompts/CellmPrompts.cs        | 14 +++++---------
 src/Cellm/AddIn/Prompts/PromptBuilder.cs       |  5 -----
 src/Cellm/Models/Llamafile/LlamafileClient.cs  | 13 +++++++------
 src/Cellm/appsettings.Local.Llamafile.GPU.json |  5 ++---
 src/Cellm/appsettings.Local.Llamafile.json     |  5 ++---
 5 files changed, 16 insertions(+), 26 deletions(-)
diff --git a/src/Cellm/AddIn/Prompts/CellmPrompts.cs b/src/Cellm/AddIn/Prompts/CellmPrompts.cs
index 0953d26..c3fd445 100644
--- a/src/Cellm/AddIn/Prompts/CellmPrompts.cs
+++ b/src/Cellm/AddIn/Prompts/CellmPrompts.cs
@@ -4,28 +4,24 @@ internal static class CellmPrompts
 {
     public const string SystemMessage = @"
 <input>
-The user has called you via the ""Prompt"" Excel function in a cell formula. 
-The argument to the formula is the range of cells the user selected, e.g. ""=Prompt(A1)"" or ""=Prompt(A1:D10)"".
-The cells are rendered as a table where each cell conist of its coordinate and value.
+The user has called you via an Excel formula. 
+The Excel sheet is rendered as a table where each cell conist of its coordinate and value.
 The table is your context and you must use it when following the user's instructions.
 <input>
 
 <constraints>
-You can only solve tasks that return data suitable for a single cell in a spreadsheet and in a format that is plain text or a numeric value.
 If you cannot find any instructions, or you cannot follow user's instructions in a cell-appropriate format, reply with ""#INSTRUCTION_ERROR?"" and nothing else.
 </constraints>
 
 <output>
-Return ONLY the result of following the user's instructions.
-The result must be one of the following:
+Return ONLY the result of following the user's instructions as plain text without formatting.
+Your response MUST be EITHER:
 
 - A single word or number
 - A comma-separated list of words or numbers
-- A brief sentence
+- A sentence
 
-Be concise. Cells have limited visible space.
 Do not provide explanations, steps, or engage in conversation.
-Ensure the output is directly usable in a spreadsheet cell.
 </output>
 ";
 
diff --git a/src/Cellm/AddIn/Prompts/PromptBuilder.cs b/src/Cellm/AddIn/Prompts/PromptBuilder.cs
index 4f70f81..68b2701 100644
--- a/src/Cellm/AddIn/Prompts/PromptBuilder.cs
+++ b/src/Cellm/AddIn/Prompts/PromptBuilder.cs
@@ -21,11 +21,6 @@ public PromptBuilder(Prompt prompt)
 
     public PromptBuilder SetSystemMessage(string systemMessage)
     {
-        if (_messages.Any(x => x.Role.Equals(Role.System)))
-        {
-            throw new CellmException("Cannot set system message when messages already has one");
-        }
-
         _systemMessage = systemMessage;
         return this;
     }
diff --git a/src/Cellm/Models/Llamafile/LlamafileClient.cs b/src/Cellm/Models/Llamafile/LlamafileClient.cs
index 3fd370b..357afbb 100644
--- a/src/Cellm/Models/Llamafile/LlamafileClient.cs
+++ b/src/Cellm/Models/Llamafile/LlamafileClient.cs
@@ -42,7 +42,7 @@ public LlamafileClient(IOptions<CellmConfiguration> cellmConfiguration,
 
         _llamafileModelPath = new AsyncLazy<string>(async () =>
         {
-            return await DownloadFile(_llamafileConfiguration.Models[_llamafileConfiguration.DefaultModel], $"Llamafile-{_llamafileConfiguration.DefaultModel}", httpClient);
+            return await DownloadFile(_llamafileConfiguration.Models[_llamafileConfiguration.DefaultModel], $"Llamafile-model-weights-{_llamafileConfiguration.DefaultModel}", httpClient);
         });
 
         _llamafileProcess = new AsyncLazy<Process>(async () =>
@@ -79,7 +79,8 @@ private async Task<Process> StartProcess()
 
         try
         {
-            await WaitForLlamafile(process);
+            Thread.Sleep(5000);
+            // await WaitForLlamafile(process);
             _llamafileProcessManager.AssignProcessToCellm(process);
             return process;
         }
@@ -127,7 +128,7 @@ private async Task WaitForLlamafile(Process process)
         var cancellationTokenSource = new CancellationTokenSource(TimeSpan.FromSeconds(1));
         var startTime = DateTime.UtcNow;
 
-        // 30 seconds timeout
+        // Max 30 seconds timeout
         while ((DateTime.UtcNow - startTime).TotalSeconds < 30)
         {
             if (process.HasExited)
@@ -144,14 +145,14 @@ private async Task WaitForLlamafile(Process process)
                     return;
                 }
             }
-            catch (TaskCanceledException)
+            catch (HttpRequestException)
             {
             }
-            catch (HttpRequestException)
+            catch (TaskCanceledException)
             {
             }
 
-            // Wait for before next attempt
+            // Wait before next attempt
             await Task.Delay(500);
         }
 
diff --git a/src/Cellm/appsettings.Local.Llamafile.GPU.json b/src/Cellm/appsettings.Local.Llamafile.GPU.json
index ddf5711..10a9a46 100644
--- a/src/Cellm/appsettings.Local.Llamafile.GPU.json
+++ b/src/Cellm/appsettings.Local.Llamafile.GPU.json
@@ -1,10 +1,9 @@
 {
     "LlamafileConfiguration": {
         "LlamafileUrl": "https://github.com/Mozilla-Ocho/llamafile/releases/download/0.8.13/llamafile-0.8.13",
-        "DefaultModel": "qwen-2.5-3b-instruct-q6-k-l",
+        "DefaultModel": "gemma-2-2b-instruct-q6-k",
         "Models": {
-            "qwen-2.5-3b-instruct-q6-k-l": "https://huggingface.co/bartowski/Qwen2.5-3B-Instruct-GGUF/resolve/main/Qwen2.5-3B-Instruct-Q6_K_L.gguf?download=true",
-            "qwen-2.5-0.5b-instruct-q6-k-l": "https://huggingface.co/bartowski/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/Qwen2.5-0.5B-Instruct-Q6_K_L.gguf?download=true"
+            "gemma-2-2b-instruct-q6-k": "https://huggingface.co/bartowski/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q6_K.gguf"
         },
         "Port": 22195,
         "GPU": true,
diff --git a/src/Cellm/appsettings.Local.Llamafile.json b/src/Cellm/appsettings.Local.Llamafile.json
index 53632f3..f45e147 100644
--- a/src/Cellm/appsettings.Local.Llamafile.json
+++ b/src/Cellm/appsettings.Local.Llamafile.json
@@ -1,10 +1,9 @@
 {
     "LlamafileConfiguration": {
         "LlamafileUrl": "https://github.com/Mozilla-Ocho/llamafile/releases/download/0.8.13/llamafile-0.8.13",
-        "DefaultModel": "qwen-2.5-3b-instruct-q6-k-l",
+        "DefaultModel": "gemma-2-2b-instruct-q6-k",
         "Models": {
-            "qwen-2.5-3b-instruct-q6-k-l": "https://huggingface.co/bartowski/Qwen2.5-3B-Instruct-GGUF/resolve/main/Qwen2.5-3B-Instruct-Q6_K_L.gguf?download=true",
-            "qwen-2.5-0.5b-instruct-q6-k-l": "https://huggingface.co/bartowski/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/Qwen2.5-0.5B-Instruct-Q6_K_L.gguf?download=true"
+            "gemma-2-2b-instruct-q6-k": "https://huggingface.co/bartowski/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q6_K.gguf"
         },
         "Port": 22195
     },