From 36b31d888b1cb343e25a5c3eba5a0bb22fe816c8 Mon Sep 17 00:00:00 2001
From: Sebastian Mahr <sebastian.mahr83@gmail.com>
Date: Tue, 14 Jan 2025 13:19:17 +0100
Subject: [PATCH] chore: add option for the OLLama server to not be online all
 the time

---
 config/config.js          |   3 +-
 routes/setup.js           |  17 ++-
 server.js                 |  38 +++++--
 services/ollamaService.js | 227 +++++++++++++++++++-------------------
 services/setupService.js  |  29 +++--
 5 files changed, 178 insertions(+), 136 deletions(-)

diff --git a/config/config.js b/config/config.js
index d3ae9c4..b888429 100644
--- a/config/config.js
+++ b/config/config.js
@@ -23,7 +23,8 @@ module.exports = {
   },
   ollama: {
     apiUrl: process.env.OLLAMA_API_URL || 'http://localhost:11434',
-    model: process.env.OLLAMA_MODEL || 'llama2'
+    model: process.env.OLLAMA_MODEL || 'llama2',
+    skipValidation: process.env.OLLAMA_SKIP_VALIDATION || "false"
   },
   aiProvider: process.env.AI_PROVIDER || 'openai',
   scanInterval: process.env.SCAN_INTERVAL || '*/30 * * * *',
diff --git a/routes/setup.js b/routes/setup.js
index a278f32..3ce5aac 100644
--- a/routes/setup.js
+++ b/routes/setup.js
@@ -16,7 +16,7 @@ const bcrypt = require('bcryptjs');
 const cookieParser = require('cookie-parser');
 const { authenticateJWT, isAuthenticated } = require('./auth.js');
 const JWT_SECRET = process.env.JWT_SECRET || 'your-secret-key';
-
+const config = require("../config/config.js");
 
 // API endpoints that should not redirect
 const API_ENDPOINTS = ['/health'];
@@ -377,7 +377,8 @@ router.get('/setup', async (req, res) => {
       PROMPT_TAGS: normalizeArray(process.env.PROMPT_TAGS),
       PAPERLESS_AI_VERSION: configFile.PAPERLESS_AI_VERSION || ' ',
       PROCESS_ONLY_NEW_DOCUMENTS: process.env.PROCESS_ONLY_NEW_DOCUMENTS || 'yes',
-      USE_EXISTING_DATA: process.env.USE_EXISTING_DATA || 'no'
+      USE_EXISTING_DATA: process.env.USE_EXISTING_DATA || 'no',
+      OLLAMA_SKIP_VALIDATION: process.env.OLLAMA_SKIP_VALIDATION || "false"
     };
 
     // Check both configuration and users
@@ -658,6 +659,18 @@ router.post('/manual/playground', express.json(), async (req, res) => {
       )
       return res.json(analyzeDocument);
     } else if (process.env.AI_PROVIDER === 'ollama') {
+
+      if(config.OLLAMA_SKIP_VALIDATION !== 'true') {
+        const ollamaValid = await setupService.validateOllamaConfig(
+          config.ollama.apiUrl,
+          config.ollama.model
+        );
+        if (!ollamaValid) {
+          console.error('Ollama server is not running or the configuration is invalid. Skipping document scan.');
+          return;
+        }
+      }
+
       const analyzeDocument = await ollamaService.analyzePlayground(content, prompt);
       return res.json(analyzeDocument);
     } else {
diff --git a/server.js b/server.js
index 6c0bde4..044f8c7 100644
--- a/server.js
+++ b/server.js
@@ -92,7 +92,7 @@ async function processDocument(doc, existingTags, existingCorrespondentList, own
 
   const aiService = AIServiceFactory.getService();
   const analysis = await aiService.analyzeDocument(content, existingTags, existingCorrespondentList, doc.id);
-  
+
   if (analysis.error) {
     throw new Error(`[ERROR] Document analysis failed: ${analysis.error}`);
   }
@@ -132,13 +132,13 @@ async function buildUpdateData(analysis, doc) {
 
 async function saveDocumentChanges(docId, updateData, analysis, originalData) {
   const { tags: originalTags, correspondent: originalCorrespondent, title: originalTitle } = originalData;
-  
+
   await Promise.all([
     documentModel.saveOriginalData(docId, originalTags, originalCorrespondent, originalTitle),
     paperlessService.updateDocument(docId, updateData),
     documentModel.addProcessedDocument(docId, updateData.title),
     documentModel.addOpenAIMetrics(
-      docId, 
+      docId,
       analysis.metrics.promptTokens,
       analysis.metrics.completionTokens,
       analysis.metrics.totalTokens
@@ -150,6 +150,18 @@ async function saveDocumentChanges(docId, updateData, analysis, originalData) {
 // Main scanning functions
 async function scanInitial() {
   try {
+
+    if (config.aiProvider === 'ollama' && config.ollama.skipValidation === 'true') {
+      const ollamaValid = await setupService.validateOllamaConfig(
+        config.ollama.apiUrl,
+        config.ollama.model
+      );
+      if (!ollamaValid) {
+        console.error('Ollama server is not running or the configuration is invalid. Skipping document scan.');
+        return;
+      }
+    }
+
     const isConfigured = await setupService.isConfigured();
     if (!isConfigured) {
       console.log('[ERROR] Setup not completed. Skipping document scan.');
@@ -187,6 +199,16 @@ async function scanDocuments() {
     console.log('[DEBUG] Task already running');
     return;
   }
+  if (config.aiProvider === 'ollama' && config.OLLAMA_SKIP_VALIDATION !== 'true') {
+    const ollamaValid = await setupService.validateOllamaConfig(
+      config.ollama.apiUrl,
+      config.ollama.model
+    );
+    if (!ollamaValid) {
+      console.error('Ollama server is not running or the configuration is invalid. Skipping document scan.');
+      return;
+    }
+  }
 
   runningTask = true;
   try {
@@ -236,7 +258,7 @@ app.get('/health', async (req, res) => {
   try {
     const isConfigured = await setupService.isConfigured();
     if (!isConfigured) {
-      return res.status(503).json({ 
+      return res.status(503).json({
         status: 'not_configured',
         message: 'Application setup not completed'
       });
@@ -246,9 +268,9 @@ app.get('/health', async (req, res) => {
     res.json({ status: 'healthy' });
   } catch (error) {
     console.error('Health check failed:', error);
-    res.status(503).json({ 
-      status: 'error', 
-      message: error.message 
+    res.status(503).json({
+      status: 'error',
+      message: error.message
     });
   }
 });
@@ -281,7 +303,7 @@ async function startScanning() {
     cron.schedule(config.scanInterval, async () => {
       console.log(`Starting scheduled scan at ${new Date().toISOString()}`);
       await scanDocuments();
-    });
+    }, { runOnInit: false });
   } catch (error) {
     console.error('[ERROR] in startScanning:', error);
   }
diff --git a/services/ollamaService.js b/services/ollamaService.js
index a0c3ce9..b71a371 100644
--- a/services/ollamaService.js
+++ b/services/ollamaService.js
@@ -25,17 +25,17 @@ class OllamaService {
                 await fs.access(cachePath);
                 console.log('[DEBUG] Thumbnail already cached');
             } catch (err) {
-                console.log('Thumbnail not cached, fetching from Paperless');  
+                console.log('Thumbnail not cached, fetching from Paperless');
                 const thumbnailData = await paperlessService.getThumbnailImage(id);
-            if (!thumbnailData) {
-                console.warn('Thumbnail nicht gefunden');
-            }
+                if (!thumbnailData) {
+                    console.warn('Thumbnail nicht gefunden');
+                }
                 await fs.mkdir(path.dirname(cachePath), { recursive: true });
                 await fs.writeFile(cachePath, thumbnailData);
             }
 
             await this.writePromptToFile(prompt);
-            
+
             const response = await this.client.post(`${this.apiUrl}/api/generate`, {
                 model: this.model,
                 prompt: prompt,
@@ -54,21 +54,21 @@ class OllamaService {
                 `,
                 stream: false,
                 options: {
-                  temperature: 0.7, 
-                  top_p: 0.9,
-                  repeat_penalty: 1.1,
-                  top_k: 7,
-                  num_predict: 256,
-                  num_ctx: 100000
+                    temperature: 0.7,
+                    top_p: 0.9,
+                    repeat_penalty: 1.1,
+                    top_k: 7,
+                    num_predict: 256,
+                    num_ctx: prompt.length + 256
                 }
-              //   options: {
-              //     temperature: 0.3,        // Moderately low for balance between consistency and creativity
-              //     top_p: 0.7,             // More reasonable value to allow sufficient token diversity
-              //     repeat_penalty: 1.1,     // Return to original value as 1.2 might be too restrictive
-              //     top_k: 40,              // Increased from 10 to allow more token options
-              //     num_predict: 512,        // Reduced from 1024 to a more stable value
-              //     num_ctx: 2048           // Reduced context window for more stable processing
-              // }
+                //   options: {
+                //     temperature: 0.3,        // Moderately low for balance between consistency and creativity
+                //     top_p: 0.7,             // More reasonable value to allow sufficient token diversity
+                //     repeat_penalty: 1.1,     // Return to original value as 1.2 might be too restrictive
+                //     top_k: 40,              // Increased from 10 to allow more token options
+                //     num_predict: 512,        // Reduced from 1024 to a more stable value
+                //     num_ctx: 2048           // Reduced context window for more stable processing
+                // }
             });
 
             if (!response.data || !response.data.response) {
@@ -77,7 +77,7 @@ class OllamaService {
 
             const parsedResponse = this._parseResponse(response.data.response);
             //console.log('Ollama response:', parsedResponse);
-            if(parsedResponse.tags.length === 0 && parsedResponse.correspondent === null) {
+            if (parsedResponse.tags.length === 0 && parsedResponse.correspondent === null) {
                 console.warn('No tags or correspondent found in response from Ollama for Document.\nPlease review your prompt or switch to OpenAI for better results.',);
             }
 
@@ -106,31 +106,32 @@ class OllamaService {
     async writePromptToFile(systemPrompt) {
         const filePath = './logs/prompt.txt';
         const maxSize = 10 * 1024 * 1024;
-      
+
         try {
-          const stats = await fs.stat(filePath);
-          if (stats.size > maxSize) {
-            await fs.unlink(filePath); // Delete the file if is biger 10MB
-          }
+            const stats = await fs.stat(filePath);
+            if (stats.size > maxSize) {
+                await fs.unlink(filePath); // Delete the file if is biger 10MB
+            }
         } catch (error) {
-          if (error.code !== 'ENOENT') {
-            console.warn('[WARNING] Error checking file size:', error);
-          }
+            if (error.code !== 'ENOENT') {
+                console.warn('[WARNING] Error checking file size:', error);
+            }
         }
-      
+
         try {
-          await fs.appendFile(filePath, '================================================================================' + systemPrompt + '\n\n' + '================================================================================\n\n');
+            await fs.appendFile(filePath, '================================================================================' + systemPrompt + '\n\n' + '================================================================================\n\n');
         } catch (error) {
-          console.error('[ERROR] Error writing to file:', error);
+            console.error('[ERROR] Error writing to file:', error);
         }
-      }
+    }
+
 
     async analyzePlayground(content, prompt) {
         try {
-          
+            prompt = prompt + "\n\n" + JSON.stringify(content)
             const response = await this.client.post(`${this.apiUrl}/api/generate`, {
                 model: this.model,
-                prompt: prompt + "\n\n" + JSON.stringify(content),
+                prompt: prompt,
                 system: `
                 You are a document analyzer. Your task is to analyze documents and extract relevant information. You do not ask back questions. 
                 YOU MUSTNOT: Ask for additional information or clarification, or ask questions about the document, or ask for additional context.
@@ -146,21 +147,21 @@ class OllamaService {
                 `,
                 stream: false,
                 options: {
-                  temperature: 0.7, 
-                  top_p: 0.9,
-                  repeat_penalty: 1.1,
-                  top_k: 7,
-                  num_predict: 256,
-                  num_ctx: 100000
+                    temperature: 0.7,
+                    top_p: 0.9,
+                    repeat_penalty: 1.1,
+                    top_k: 7,
+                    num_predict: 256,
+                    num_ctx: prompt.length + 256
                 }
-              //   options: {
-              //     temperature: 0.3,        // Moderately low for balance between consistency and creativity
-              //     top_p: 0.7,             // More reasonable value to allow sufficient token diversity
-              //     repeat_penalty: 1.1,     // Return to original value as 1.2 might be too restrictive
-              //     top_k: 40,              // Increased from 10 to allow more token options
-              //     num_predict: 512,        // Reduced from 1024 to a more stable value
-              //     num_ctx: 2048           // Reduced context window for more stable processing
-              // }
+                //   options: {
+                //     temperature: 0.3,        // Moderately low for balance between consistency and creativity
+                //     top_p: 0.7,             // More reasonable value to allow sufficient token diversity
+                //     repeat_penalty: 1.1,     // Return to original value as 1.2 might be too restrictive
+                //     top_k: 40,              // Increased from 10 to allow more token options
+                //     num_predict: 512,        // Reduced from 1024 to a more stable value
+                //     num_ctx: 2048           // Reduced context window for more stable processing
+                // }
             });
 
             if (!response.data || !response.data.response) {
@@ -169,7 +170,7 @@ class OllamaService {
 
             const parsedResponse = this._parseResponse(response.data.response);
             //console.log('Ollama response:', parsedResponse);
-            if(parsedResponse.tags.length === 0 && parsedResponse.correspondent === null) {
+            if (parsedResponse.tags.length === 0 && parsedResponse.correspondent === null) {
                 console.warn('No tags or correspondent found in response from Ollama for Document.\nPlease review your prompt or switch to OpenAI for better results.',);
             }
 
@@ -197,19 +198,19 @@ class OllamaService {
     _buildPrompt(content, existingTags = [], existingCorrespondent = []) {
         let systemPrompt;
         let promptTags = '';
-    
+
         // Validate that existingCorrespondent is an array and handle if it's not
-        const correspondentList = Array.isArray(existingCorrespondent) 
-            ? existingCorrespondent 
+        const correspondentList = Array.isArray(existingCorrespondent)
+            ? existingCorrespondent
             : [];
-    
+
         if (process.env.USE_PROMPT_TAGS === 'yes') {
             promptTags = process.env.PROMPT_TAGS;
             systemPrompt = config.specialPromptPreDefinedTags;
         } else {
             systemPrompt = process.env.SYSTEM_PROMPT;
         }
-    
+
         // Format existing tags
         const existingTagsList = Array.isArray(existingTags)
             ? existingTags
@@ -217,7 +218,7 @@ class OllamaService {
                 .map(tag => tag.name)
                 .join(', ')
             : '';
-    
+
         // Format existing correspondents - handle both array of objects and array of strings
         const existingCorrespondentList = correspondentList
             .filter(Boolean)  // Remove any null/undefined entries
@@ -227,15 +228,15 @@ class OllamaService {
             })
             .filter(name => name.length > 0)  // Remove empty strings
             .join(', ');
-    
-        if(process.env.USE_EXISTING_DATA === 'yes') {
+
+        if (process.env.USE_EXISTING_DATA === 'yes') {
             return `${systemPrompt}
             Existing tags: ${existingTagsList}\n
             Existing Correspondents: ${existingCorrespondentList}\n
             ${JSON.stringify(content)}
             
             `;
-        }else {
+        } else {
             return `${systemPrompt}
             ${JSON.stringify(content)}
             `;
@@ -243,61 +244,61 @@ class OllamaService {
     }
 
     _parseResponse(response) {
-      try {
-          // Find JSON in response using regex
-          const jsonMatch = response.match(/\{[\s\S]*\}/);
-          if (!jsonMatch) {
-              //console.warn('No JSON found in response:', response);
-              return { tags: [], correspondent: null };
-          }
-  
-          let jsonStr = jsonMatch[0];
-          console.log('Extracted JSON String:', jsonStr);
-  
-          try {
-              // Attempt to parse the JSON
-              const result = JSON.parse(jsonStr);
-  
-              // Validate and return the result
-              return {
-                  tags: Array.isArray(result.tags) ? result.tags : [],
-                  correspondent: result.correspondent || null,
-                  title: result.title || null,
-                  document_date: result.document_date || null,
-                  language: result.language || null
-              };
-  
-          } catch (errorx) {
-              console.warn('Error parsing JSON from response:', errorx.message);
-              console.warn('Attempting to sanitize the JSON...');
-  
-              // Optionally sanitize the JSON here
-              jsonStr = jsonStr
-                  .replace(/,\s*}/g, '}') // Remove trailing commas before closing braces
-                  .replace(/,\s*]/g, ']') // Remove trailing commas before closing brackets
-                  .replace(/(['"])?([a-zA-Z0-9_]+)(['"])?\s*:/g, '"$2":'); // Ensure property names are quoted
-  
-              try {
-                  const sanitizedResult = JSON.parse(jsonStr);
-                  return {
-                      tags: Array.isArray(sanitizedResult.tags) ? sanitizedResult.tags : [],
-                      correspondent: sanitizedResult.correspondent || null,
-                      title: sanitizedResult.title || null,
-                      document_date: sanitizedResult.document_date || null,
-                      language: sanitizedResult.language || null
-                  };
-              } catch (finalError) {
-                  console.error('Final JSON parsing failed after sanitization.\nThis happens when the JSON structure is too complex or invalid.\nThat indicates an issue with the generated JSON string by Ollama.\nSwitch to OpenAI for better results or fine tune your prompt.');
-                  //console.error('Sanitized JSON String:', jsonStr);
-                  return { tags: [], correspondent: null };
-              }
-          }
-      } catch (error) {
-          console.error('Error parsing Ollama response:', error.message);
-          console.error('Raw response:', response);
-          return { tags: [], correspondent: null };
-      }
-  }
+        try {
+            // Find JSON in response using regex
+            const jsonMatch = response.match(/\{[\s\S]*\}/);
+            if (!jsonMatch) {
+                //console.warn('No JSON found in response:', response);
+                return { tags: [], correspondent: null };
+            }
+
+            let jsonStr = jsonMatch[0];
+            console.log('Extracted JSON String:', jsonStr);
+
+            try {
+                // Attempt to parse the JSON
+                const result = JSON.parse(jsonStr);
+
+                // Validate and return the result
+                return {
+                    tags: Array.isArray(result.tags) ? result.tags : [],
+                    correspondent: result.correspondent || null,
+                    title: result.title || null,
+                    document_date: result.document_date || null,
+                    language: result.language || null
+                };
+
+            } catch (errorx) {
+                console.warn('Error parsing JSON from response:', errorx.message);
+                console.warn('Attempting to sanitize the JSON...');
+
+                // Optionally sanitize the JSON here
+                jsonStr = jsonStr
+                    .replace(/,\s*}/g, '}') // Remove trailing commas before closing braces
+                    .replace(/,\s*]/g, ']') // Remove trailing commas before closing brackets
+                    .replace(/(['"])?([a-zA-Z0-9_]+)(['"])?\s*:/g, '"$2":'); // Ensure property names are quoted
+
+                try {
+                    const sanitizedResult = JSON.parse(jsonStr);
+                    return {
+                        tags: Array.isArray(sanitizedResult.tags) ? sanitizedResult.tags : [],
+                        correspondent: sanitizedResult.correspondent || null,
+                        title: sanitizedResult.title || null,
+                        document_date: sanitizedResult.document_date || null,
+                        language: sanitizedResult.language || null
+                    };
+                } catch (finalError) {
+                    console.error('Final JSON parsing failed after sanitization.\nThis happens when the JSON structure is too complex or invalid.\nThat indicates an issue with the generated JSON string by Ollama.\nSwitch to OpenAI for better results or fine tune your prompt.');
+                    //console.error('Sanitized JSON String:', jsonStr);
+                    return { tags: [], correspondent: null };
+                }
+            }
+        } catch (error) {
+            console.error('Error parsing Ollama response:', error.message);
+            console.error('Raw response:', response);
+            return { tags: [], correspondent: null };
+        }
+    }
 }
 
 module.exports = new OllamaService();
\ No newline at end of file
diff --git a/services/setupService.js b/services/setupService.js
index 26f1f47..cf413bd 100644
--- a/services/setupService.js
+++ b/services/setupService.js
@@ -57,7 +57,7 @@ class SetupService {
         console.error('OpenAI validation error:', error.message);
         return false;
       }
-    }else{
+    } else {
       return true;
     }
   }
@@ -67,8 +67,11 @@ class SetupService {
       const response = await axios.post(`${url}/api/generate`, {
         model: model || 'llama2',
         prompt: 'Test',
-        stream: false
-      });
+        stream: false,
+      },
+        {
+          timeout: 10000
+        });
       return response.data && response.data.response;
     } catch (error) {
       console.error('Ollama validation error:', error.message);
@@ -82,26 +85,28 @@ class SetupService {
       config.PAPERLESS_API_URL,
       config.PAPERLESS_API_TOKEN
     );
-    
+
     if (!paperlessValid) {
       throw new Error('Invalid Paperless configuration');
     }
 
     // Validate AI provider config
     const aiProvider = config.AI_PROVIDER || 'openai';
-    
+
     if (aiProvider === 'openai') {
       const openaiValid = await this.validateOpenAIConfig(config.OPENAI_API_KEY);
       if (!openaiValid) {
         throw new Error('Invalid OpenAI configuration');
       }
     } else if (aiProvider === 'ollama') {
-      const ollamaValid = await this.validateOllamaConfig(
-        config.OLLAMA_API_URL || 'http://localhost:11434',
-        config.OLLAMA_MODEL
-      );
-      if (!ollamaValid) {
-        throw new Error('Invalid Ollama configuration');
+      if (config.OLLAMA_SKIP_VALIDATION !== 'true') {
+        const ollamaValid = await this.validateOllamaConfig(
+          config.OLLAMA_API_URL || 'http://localhost:11434',
+          config.OLLAMA_MODEL
+        );
+        if (!ollamaValid) {
+          throw new Error('Invalid Ollama configuration');
+        }
       }
     }
 
@@ -138,7 +143,7 @@ class SetupService {
         .join('\n');
 
       await fs.writeFile(this.envPath, envContent);
-      
+
       // Reload environment variables
       Object.entries(config).forEach(([key, value]) => {
         process.env[key] = value;