Addressed: #157 (comment)

Addressed: #142 Regarding the Ollama Service and Context size, I implemented a dynamic way of using num_ctx parameter. Now there is a "maximum" context size of 100.000 Tokens. Thats huge and most systems won't handle that. The solution comes in handy and checks how much tokens are needed for the prompt (based on Q4_0 Quantization Size), it adds a buffer for the response of 1024 tokens (thats more enough in my eyes for the JSON response) and transfers the new calculated num_ctx to the Ollama API call.
clusterzx · Jan 16, 2025 · 34c25d6 · 34c25d6
1 parent 7eb4989
commit 34c25d6
Show file tree

Hide file tree

Showing 5 changed files with 165 additions and 63 deletions.
diff --git a/config/config.js b/config/config.js
@@ -11,7 +11,7 @@ console.log('Loaded environment variables:', {
 });
 
 module.exports = {
-  PAPERLESS_AI_VERSION: '2.1.9',
+  PAPERLESS_AI_VERSION: '2.2.0',
   CONFIGURED: false,
   predefinedMode: process.env.PROCESS_PREDEFINED_DOCUMENTS,
   paperless: {
@@ -42,4 +42,12 @@ module.exports = {
     "document_date": "YYYY-MM-DD",
     "language": "en/de/es/..."
   }`,
+  mustHavePrompt: `  Return the result EXCLUSIVELY as a JSON object. The Tags and Title MUST be in the language that is used in the document.:
+  {
+    "title": "xxxxx",
+    "correspondent": "xxxxxxxx",
+    "tags": ["Tag1", "Tag2", "Tag3", "Tag4"],
+    "document_date": "YYYY-MM-DD",
+    "language": "en/de/es/..."
+  }`,
 };
diff --git a/services/manualService.js b/services/manualService.js
@@ -68,6 +68,37 @@ class ManualService {
     async _analyzeOllama(content, existingTags) {
         try {
         const prompt = process.env.SYSTEM_PROMPT;
+
+        const getAvailableMemory = async () => {
+            const totalMemory = os.totalmem();
+            const freeMemory = os.freemem();
+            const totalMemoryMB = (totalMemory / (1024 * 1024)).toFixed(0);
+            const freeMemoryMB = (freeMemory / (1024 * 1024)).toFixed(0);
+            return { totalMemoryMB, freeMemoryMB };
+        };
+
+        const calculateNumCtx = (promptTokenCount, expectedResponseTokens) => {
+            const totalTokenUsage = promptTokenCount + expectedResponseTokens;
+            const maxCtxLimit = 128000;
+
+            const numCtx = Math.min(totalTokenUsage, maxCtxLimit);
+
+            console.log('Prompt Token Count:', promptTokenCount);
+            console.log('Expected Response Tokens:', expectedResponseTokens);
+            console.log('Dynamic calculated num_ctx:', numCtx);
+
+            return numCtx;
+        };
+
+        const calculatePromptTokenCount = (prompt) => {
+            return Math.ceil(prompt.length / 4);
+        };
+
+        const { freeMemoryMB } = await getAvailableMemory();
+        const expectedResponseTokens = 1024;
+        const promptTokenCount = calculatePromptTokenCount(prompt);
+
+        const numCtx = calculateNumCtx(promptTokenCount, expectedResponseTokens);
 
         const response = await this.ollama.post(`${config.ollama.apiUrl}/api/generate`, {
             model: config.ollama.model,
@@ -76,7 +107,8 @@ class ManualService {
             options: {
             temperature: 0.7,
             top_p: 0.9,
-            repeat_penalty: 1.1
+            repeat_penalty: 1.1,
+            num_ctx: numCtx,
             }
         });
 

diff --git a/services/ollamaService.js b/services/ollamaService.js
@@ -3,6 +3,7 @@ const config = require('../config/config');
 const fs = require('fs').promises;
 const path = require('path');
 const paperlessService = require('./paperlessService');
+const os = require('os');
 
 class OllamaService {
     constructor() {
@@ -34,8 +35,37 @@ class OllamaService {
                 await fs.writeFile(cachePath, thumbnailData);
             }
 
-            await this.writePromptToFile(prompt);
 
+            const getAvailableMemory = async () => {
+                const totalMemory = os.totalmem();
+                const freeMemory = os.freemem();
+                const totalMemoryMB = (totalMemory / (1024 * 1024)).toFixed(0);
+                const freeMemoryMB = (freeMemory / (1024 * 1024)).toFixed(0);
+                return { totalMemoryMB, freeMemoryMB };
+            };
+
+            const calculateNumCtx = (promptTokenCount, expectedResponseTokens) => {
+                const totalTokenUsage = promptTokenCount + expectedResponseTokens;
+                const maxCtxLimit = 128000;
+
+                const numCtx = Math.min(totalTokenUsage, maxCtxLimit);
+
+                console.log('Prompt Token Count:', promptTokenCount);
+                console.log('Expected Response Tokens:', expectedResponseTokens);
+                console.log('Dynamic calculated num_ctx:', numCtx);
+
+                return numCtx;
+            };
+
+            const calculatePromptTokenCount = (prompt) => {
+                return Math.ceil(prompt.length / 4);
+            };
+
+            const { freeMemoryMB } = await getAvailableMemory();
+            const expectedResponseTokens = 1024;
+            const promptTokenCount = calculatePromptTokenCount(prompt);
+
+            const numCtx = calculateNumCtx(promptTokenCount, expectedResponseTokens);
             const response = await this.client.post(`${this.apiUrl}/api/generate`, {
                 model: this.model,
                 prompt: prompt,
@@ -44,65 +74,66 @@ class OllamaService {
                 YOU MUSTNOT: Ask for additional information or clarification, or ask questions about the document, or ask for additional context.
                 YOU MUSTNOT: Return a response without the desired JSON format.
                 YOU MUST: Analyze the document content and extract the following information into this structured JSON format and only this format!:         {
-                "title": "xxxxx",
-                "correspondent": "xxxxxxxx",
-                "tags": ["Tag1", "Tag2", "Tag3", "Tag4"],
-                "document_date": "YYYY-MM-DD",
-                "language": "en/de/es/..."
-                }
-                ALWAYS USE THE INFORMATION TO FILL OUT THE JSON OBJECT. DO NOT ASK BACK QUESTIONS.
-                `,
-                stream: false,
-                options: {
-                  temperature: 0.7, 
-                  top_p: 0.9,
-                  repeat_penalty: 1.1,
-                  top_k: 7,
-                  num_predict: 256,
-                  num_ctx: 100000
-                }
-              //   options: {
-              //     temperature: 0.3,        // Moderately low for balance between consistency and creativity
-              //     top_p: 0.7,             // More reasonable value to allow sufficient token diversity
-              //     repeat_penalty: 1.1,     // Return to original value as 1.2 might be too restrictive
-              //     top_k: 40,              // Increased from 10 to allow more token options
-              //     num_predict: 512,        // Reduced from 1024 to a more stable value
-              //     num_ctx: 2048           // Reduced context window for more stable processing
-              // }
-            });
-
-            if (!response.data || !response.data.response) {
-                throw new Error('Invalid response from Ollama API');
-            }
-
-            const parsedResponse = this._parseResponse(response.data.response);
-            //console.log('Ollama response:', parsedResponse);
-            if(parsedResponse.tags.length === 0 && parsedResponse.correspondent === null) {
-                console.warn('No tags or correspondent found in response from Ollama for Document.\nPlease review your prompt or switch to OpenAI for better results.',);
-            }
-
-            // Match the OpenAI service response structure
-            return {
-                document: parsedResponse,
-                metrics: {
-                    promptTokens: 0,  // Ollama doesn't provide token metrics
-                    completionTokens: 0,
-                    totalTokens: 0
-                },
-                truncated: false
-            };
-
-        } catch (error) {
-            console.error('Error analyzing document with Ollama:', error);
-            return {
-                document: { tags: [], correspondent: null },
-                metrics: null,
+                    "title": "xxxxx",
+                    "correspondent": "xxxxxxxx",
+                    "tags": ["Tag1", "Tag2", "Tag3", "Tag4"],
+                    "document_date": "YYYY-MM-DD",
+                    "language": "en/de/es/..."
+                    }
+                    ALWAYS USE THE INFORMATION TO FILL OUT THE JSON OBJECT. DO NOT ASK BACK QUESTIONS.
+                    `,
+                    stream: false,
+                    options: {
+                        temperature: 0.7, 
+                        top_p: 0.9,
+                        repeat_penalty: 1.1,
+                        top_k: 7,
+                        num_predict: 256,
+                        num_ctx: numCtx 
+                    }
+                    //   options: {
+                        //     temperature: 0.3,        // Moderately low for balance between consistency and creativity
+                        //     top_p: 0.7,             // More reasonable value to allow sufficient token diversity
+                        //     repeat_penalty: 1.1,     // Return to original value as 1.2 might be too restrictive
+                        //     top_k: 40,              // Increased from 10 to allow more token options
+                        //     num_predict: 512,        // Reduced from 1024 to a more stable value
+                        //     num_ctx: 2048           // Reduced context window for more stable processing
+                        // }
+                    });
+
+                    if (!response.data || !response.data.response) {
+                        throw new Error('Invalid response from Ollama API');
+                    }
+
+                    const parsedResponse = this._parseResponse(response.data.response);
+                    //console.log('Ollama response:', parsedResponse);
+                    if(parsedResponse.tags.length === 0 && parsedResponse.correspondent === null) {
+                        console.warn('No tags or correspondent found in response from Ollama for Document.\nPlease review your prompt or switch to OpenAI for better results.',);
+                    }
+
+                    await this.writePromptToFile(prompt + "\n\n" + JSON.stringify(parsedResponse));
+                    // Match the OpenAI service response structure
+                    return {
+                        document: parsedResponse,
+                        metrics: {
+                            promptTokens: 0,  // Ollama doesn't provide token metrics
+                            completionTokens: 0,
+                            totalTokens: 0
+                        },
+                        truncated: false
+                    };
+
+                } catch (error) {
+                    console.error('Error analyzing document with Ollama:', error);
+                    return {
+                        document: { tags: [], correspondent: null },
+                        metrics: null,
                 error: error.message
             };
         }
     }
-
-
+    
+    
     async writePromptToFile(systemPrompt) {
         const filePath = './logs/prompt.txt';
         const maxSize = 10 * 1024 * 1024;
@@ -127,6 +158,37 @@ class OllamaService {
 
     async analyzePlayground(content, prompt) {
         try {
+
+            const getAvailableMemory = async () => {
+                const totalMemory = os.totalmem();
+                const freeMemory = os.freemem();
+                const totalMemoryMB = (totalMemory / (1024 * 1024)).toFixed(0);
+                const freeMemoryMB = (freeMemory / (1024 * 1024)).toFixed(0);
+                return { totalMemoryMB, freeMemoryMB };
+            };
+
+            const calculateNumCtx = (promptTokenCount, expectedResponseTokens) => {
+                const totalTokenUsage = promptTokenCount + expectedResponseTokens;
+                const maxCtxLimit = 128000;
+
+                const numCtx = Math.min(totalTokenUsage, maxCtxLimit);
+
+                console.log('Prompt Token Count:', promptTokenCount);
+                console.log('Expected Response Tokens:', expectedResponseTokens);
+                console.log('Dynamic calculated num_ctx:', numCtx);
+
+                return numCtx;
+            };
+
+            const calculatePromptTokenCount = (prompt) => {
+                return Math.ceil(prompt.length / 4);
+            };
+
+            const { freeMemoryMB } = await getAvailableMemory();
+            const expectedResponseTokens = 1024;
+            const promptTokenCount = calculatePromptTokenCount(prompt);
+
+            const numCtx = calculateNumCtx(promptTokenCount, expectedResponseTokens);
 
             const response = await this.client.post(`${this.apiUrl}/api/generate`, {
                 model: this.model,
@@ -151,7 +213,7 @@ class OllamaService {
                   repeat_penalty: 1.1,
                   top_k: 7,
                   num_predict: 256,
-                  num_ctx: 100000
+                  num_ctx: numCtx
                 }
               //   options: {
               //     temperature: 0.3,        // Moderately low for balance between consistency and creativity
@@ -207,7 +269,7 @@ class OllamaService {
             promptTags = process.env.PROMPT_TAGS;
             systemPrompt = config.specialPromptPreDefinedTags;
         } else {
-            systemPrompt = process.env.SYSTEM_PROMPT;
+            systemPrompt = process.env.SYSTEM_PROMPT + '\n\n' + config.mustHavePrompt;
         }
 
         // Format existing tags

diff --git a/services/openaiService.js b/services/openaiService.js
@@ -107,10 +107,10 @@ class OpenAIService {
         systemPrompt = `
         Prexisting tags: ${existingTagsList}\n\n
         Prexisiting correspondent: ${existingCorrespondentList}\n\n
-        ` + process.env.SYSTEM_PROMPT;
+        ` + process.env.SYSTEM_PROMPT + '\n\n' + config.mustHavePrompt;
         promptTags = '';
       } else {
-        systemPrompt = process.env.SYSTEM_PROMPT;
+        systemPrompt = process.env.SYSTEM_PROMPT + '\n\n' + config.mustHavePrompt;
         promptTags = '';
       }
       if (process.env.USE_PROMPT_TAGS === 'yes') {

diff --git a/services/setupService.js b/services/setupService.js
@@ -131,7 +131,7 @@ class SetupService {
       const envContent = Object.entries(config)
         .map(([key, value]) => {
           if (key === "SYSTEM_PROMPT") {
-            return `${key}=\`${value}\n${JSON_STANDARD_PROMPT}\``;
+            return `${key}=\`${value}\n\``;
           }
           return `${key}=${value}`;
         })