diff --git a/config/config.js b/config/config.js index ae64754..89c300b 100644 --- a/config/config.js +++ b/config/config.js @@ -23,7 +23,8 @@ module.exports = { }, ollama: { apiUrl: process.env.OLLAMA_API_URL || 'http://localhost:11434', - model: process.env.OLLAMA_MODEL || 'llama2' + model: process.env.OLLAMA_MODEL || 'llama2', + skipValidation: process.env.OLLAMA_SKIP_VALIDATION || "false" }, aiProvider: process.env.AI_PROVIDER || 'openai', scanInterval: process.env.SCAN_INTERVAL || '*/30 * * * *', diff --git a/routes/setup.js b/routes/setup.js index a912eaf..cd78191 100644 --- a/routes/setup.js +++ b/routes/setup.js @@ -16,7 +16,7 @@ const bcrypt = require('bcryptjs'); const cookieParser = require('cookie-parser'); const { authenticateJWT, isAuthenticated } = require('./auth.js'); const JWT_SECRET = process.env.JWT_SECRET || 'your-secret-key'; - +const config = require("../config/config.js"); // API endpoints that should not redirect const API_ENDPOINTS = ['/health']; @@ -415,7 +415,8 @@ router.get('/setup', async (req, res) => { PROMPT_TAGS: normalizeArray(process.env.PROMPT_TAGS), PAPERLESS_AI_VERSION: configFile.PAPERLESS_AI_VERSION || ' ', PROCESS_ONLY_NEW_DOCUMENTS: process.env.PROCESS_ONLY_NEW_DOCUMENTS || 'yes', - USE_EXISTING_DATA: process.env.USE_EXISTING_DATA || 'no' + USE_EXISTING_DATA: process.env.USE_EXISTING_DATA || 'no', + OLLAMA_SKIP_VALIDATION: process.env.OLLAMA_SKIP_VALIDATION || "false" }; // Check both configuration and users @@ -696,6 +697,18 @@ router.post('/manual/playground', express.json(), async (req, res) => { ) return res.json(analyzeDocument); } else if (process.env.AI_PROVIDER === 'ollama') { + + if(config.OLLAMA_SKIP_VALIDATION !== 'true') { + const ollamaValid = await setupService.validateOllamaConfig( + config.ollama.apiUrl, + config.ollama.model + ); + if (!ollamaValid) { + console.error('Ollama server is not running or the configuration is invalid. Skipping document scan.'); + return; + } + } + const analyzeDocument = await ollamaService.analyzePlayground(content, prompt); return res.json(analyzeDocument); } else { diff --git a/server.js b/server.js index 6c0bde4..044f8c7 100644 --- a/server.js +++ b/server.js @@ -92,7 +92,7 @@ async function processDocument(doc, existingTags, existingCorrespondentList, own const aiService = AIServiceFactory.getService(); const analysis = await aiService.analyzeDocument(content, existingTags, existingCorrespondentList, doc.id); - + if (analysis.error) { throw new Error(`[ERROR] Document analysis failed: ${analysis.error}`); } @@ -132,13 +132,13 @@ async function buildUpdateData(analysis, doc) { async function saveDocumentChanges(docId, updateData, analysis, originalData) { const { tags: originalTags, correspondent: originalCorrespondent, title: originalTitle } = originalData; - + await Promise.all([ documentModel.saveOriginalData(docId, originalTags, originalCorrespondent, originalTitle), paperlessService.updateDocument(docId, updateData), documentModel.addProcessedDocument(docId, updateData.title), documentModel.addOpenAIMetrics( - docId, + docId, analysis.metrics.promptTokens, analysis.metrics.completionTokens, analysis.metrics.totalTokens @@ -150,6 +150,18 @@ async function saveDocumentChanges(docId, updateData, analysis, originalData) { // Main scanning functions async function scanInitial() { try { + + if (config.aiProvider === 'ollama' && config.ollama.skipValidation === 'true') { + const ollamaValid = await setupService.validateOllamaConfig( + config.ollama.apiUrl, + config.ollama.model + ); + if (!ollamaValid) { + console.error('Ollama server is not running or the configuration is invalid. Skipping document scan.'); + return; + } + } + const isConfigured = await setupService.isConfigured(); if (!isConfigured) { console.log('[ERROR] Setup not completed. Skipping document scan.'); @@ -187,6 +199,16 @@ async function scanDocuments() { console.log('[DEBUG] Task already running'); return; } + if (config.aiProvider === 'ollama' && config.OLLAMA_SKIP_VALIDATION !== 'true') { + const ollamaValid = await setupService.validateOllamaConfig( + config.ollama.apiUrl, + config.ollama.model + ); + if (!ollamaValid) { + console.error('Ollama server is not running or the configuration is invalid. Skipping document scan.'); + return; + } + } runningTask = true; try { @@ -236,7 +258,7 @@ app.get('/health', async (req, res) => { try { const isConfigured = await setupService.isConfigured(); if (!isConfigured) { - return res.status(503).json({ + return res.status(503).json({ status: 'not_configured', message: 'Application setup not completed' }); @@ -246,9 +268,9 @@ app.get('/health', async (req, res) => { res.json({ status: 'healthy' }); } catch (error) { console.error('Health check failed:', error); - res.status(503).json({ - status: 'error', - message: error.message + res.status(503).json({ + status: 'error', + message: error.message }); } }); @@ -281,7 +303,7 @@ async function startScanning() { cron.schedule(config.scanInterval, async () => { console.log(`Starting scheduled scan at ${new Date().toISOString()}`); await scanDocuments(); - }); + }, { runOnInit: false }); } catch (error) { console.error('[ERROR] in startScanning:', error); } diff --git a/services/ollamaService.js b/services/ollamaService.js index 637ad49..36f2953 100644 --- a/services/ollamaService.js +++ b/services/ollamaService.js @@ -26,11 +26,11 @@ class OllamaService { await fs.access(cachePath); console.log('[DEBUG] Thumbnail already cached'); } catch (err) { - console.log('Thumbnail not cached, fetching from Paperless'); + console.log('Thumbnail not cached, fetching from Paperless'); const thumbnailData = await paperlessService.getThumbnailImage(id); - if (!thumbnailData) { - console.warn('Thumbnail nicht gefunden'); - } + if (!thumbnailData) { + console.warn('Thumbnail nicht gefunden'); + } await fs.mkdir(path.dirname(cachePath), { recursive: true }); await fs.writeFile(cachePath, thumbnailData); } @@ -137,24 +137,25 @@ class OllamaService { async writePromptToFile(systemPrompt) { const filePath = './logs/prompt.txt'; const maxSize = 10 * 1024 * 1024; - + try { - const stats = await fs.stat(filePath); - if (stats.size > maxSize) { - await fs.unlink(filePath); // Delete the file if is biger 10MB - } + const stats = await fs.stat(filePath); + if (stats.size > maxSize) { + await fs.unlink(filePath); // Delete the file if is biger 10MB + } } catch (error) { - if (error.code !== 'ENOENT') { - console.warn('[WARNING] Error checking file size:', error); - } + if (error.code !== 'ENOENT') { + console.warn('[WARNING] Error checking file size:', error); + } } - + try { - await fs.appendFile(filePath, '================================================================================' + systemPrompt + '\n\n' + '================================================================================\n\n'); + await fs.appendFile(filePath, '================================================================================' + systemPrompt + '\n\n' + '================================================================================\n\n'); } catch (error) { - console.error('[ERROR] Error writing to file:', error); + console.error('[ERROR] Error writing to file:', error); } - } + } + async analyzePlayground(content, prompt) { try { @@ -192,7 +193,7 @@ class OllamaService { const response = await this.client.post(`${this.apiUrl}/api/generate`, { model: this.model, - prompt: prompt + "\n\n" + JSON.stringify(content), + prompt: prompt, system: ` You are a document analyzer. Your task is to analyze documents and extract relevant information. You do not ask back questions. YOU MUSTNOT: Ask for additional information or clarification, or ask questions about the document, or ask for additional context. @@ -215,14 +216,14 @@ class OllamaService { num_predict: 256, num_ctx: numCtx } - // options: { - // temperature: 0.3, // Moderately low for balance between consistency and creativity - // top_p: 0.7, // More reasonable value to allow sufficient token diversity - // repeat_penalty: 1.1, // Return to original value as 1.2 might be too restrictive - // top_k: 40, // Increased from 10 to allow more token options - // num_predict: 512, // Reduced from 1024 to a more stable value - // num_ctx: 2048 // Reduced context window for more stable processing - // } + // options: { + // temperature: 0.3, // Moderately low for balance between consistency and creativity + // top_p: 0.7, // More reasonable value to allow sufficient token diversity + // repeat_penalty: 1.1, // Return to original value as 1.2 might be too restrictive + // top_k: 40, // Increased from 10 to allow more token options + // num_predict: 512, // Reduced from 1024 to a more stable value + // num_ctx: 2048 // Reduced context window for more stable processing + // } }); if (!response.data || !response.data.response) { @@ -231,7 +232,7 @@ class OllamaService { const parsedResponse = this._parseResponse(response.data.response); //console.log('Ollama response:', parsedResponse); - if(parsedResponse.tags.length === 0 && parsedResponse.correspondent === null) { + if (parsedResponse.tags.length === 0 && parsedResponse.correspondent === null) { console.warn('No tags or correspondent found in response from Ollama for Document.\nPlease review your prompt or switch to OpenAI for better results.',); } @@ -259,19 +260,19 @@ class OllamaService { _buildPrompt(content, existingTags = [], existingCorrespondent = []) { let systemPrompt; let promptTags = ''; - + // Validate that existingCorrespondent is an array and handle if it's not - const correspondentList = Array.isArray(existingCorrespondent) - ? existingCorrespondent + const correspondentList = Array.isArray(existingCorrespondent) + ? existingCorrespondent : []; - + if (process.env.USE_PROMPT_TAGS === 'yes') { promptTags = process.env.PROMPT_TAGS; systemPrompt = config.specialPromptPreDefinedTags; } else { systemPrompt = process.env.SYSTEM_PROMPT + '\n\n' + config.mustHavePrompt; } - + // Format existing tags const existingTagsList = Array.isArray(existingTags) ? existingTags @@ -279,7 +280,7 @@ class OllamaService { .map(tag => tag.name) .join(', ') : ''; - + // Format existing correspondents - handle both array of objects and array of strings const existingCorrespondentList = correspondentList .filter(Boolean) // Remove any null/undefined entries @@ -289,15 +290,15 @@ class OllamaService { }) .filter(name => name.length > 0) // Remove empty strings .join(', '); - - if(process.env.USE_EXISTING_DATA === 'yes') { + + if (process.env.USE_EXISTING_DATA === 'yes') { return `${systemPrompt} Existing tags: ${existingTagsList}\n Existing Correspondents: ${existingCorrespondentList}\n ${JSON.stringify(content)} `; - }else { + } else { return `${systemPrompt} ${JSON.stringify(content)} `; @@ -305,61 +306,61 @@ class OllamaService { } _parseResponse(response) { - try { - // Find JSON in response using regex - const jsonMatch = response.match(/\{[\s\S]*\}/); - if (!jsonMatch) { - //console.warn('No JSON found in response:', response); - return { tags: [], correspondent: null }; - } - - let jsonStr = jsonMatch[0]; - console.log('Extracted JSON String:', jsonStr); - - try { - // Attempt to parse the JSON - const result = JSON.parse(jsonStr); - - // Validate and return the result - return { - tags: Array.isArray(result.tags) ? result.tags : [], - correspondent: result.correspondent || null, - title: result.title || null, - document_date: result.document_date || null, - language: result.language || null - }; - - } catch (errorx) { - console.warn('Error parsing JSON from response:', errorx.message); - console.warn('Attempting to sanitize the JSON...'); - - // Optionally sanitize the JSON here - jsonStr = jsonStr - .replace(/,\s*}/g, '}') // Remove trailing commas before closing braces - .replace(/,\s*]/g, ']') // Remove trailing commas before closing brackets - .replace(/(['"])?([a-zA-Z0-9_]+)(['"])?\s*:/g, '"$2":'); // Ensure property names are quoted - - try { - const sanitizedResult = JSON.parse(jsonStr); - return { - tags: Array.isArray(sanitizedResult.tags) ? sanitizedResult.tags : [], - correspondent: sanitizedResult.correspondent || null, - title: sanitizedResult.title || null, - document_date: sanitizedResult.document_date || null, - language: sanitizedResult.language || null - }; - } catch (finalError) { - console.error('Final JSON parsing failed after sanitization.\nThis happens when the JSON structure is too complex or invalid.\nThat indicates an issue with the generated JSON string by Ollama.\nSwitch to OpenAI for better results or fine tune your prompt.'); - //console.error('Sanitized JSON String:', jsonStr); - return { tags: [], correspondent: null }; - } - } - } catch (error) { - console.error('Error parsing Ollama response:', error.message); - console.error('Raw response:', response); - return { tags: [], correspondent: null }; - } - } + try { + // Find JSON in response using regex + const jsonMatch = response.match(/\{[\s\S]*\}/); + if (!jsonMatch) { + //console.warn('No JSON found in response:', response); + return { tags: [], correspondent: null }; + } + + let jsonStr = jsonMatch[0]; + console.log('Extracted JSON String:', jsonStr); + + try { + // Attempt to parse the JSON + const result = JSON.parse(jsonStr); + + // Validate and return the result + return { + tags: Array.isArray(result.tags) ? result.tags : [], + correspondent: result.correspondent || null, + title: result.title || null, + document_date: result.document_date || null, + language: result.language || null + }; + + } catch (errorx) { + console.warn('Error parsing JSON from response:', errorx.message); + console.warn('Attempting to sanitize the JSON...'); + + // Optionally sanitize the JSON here + jsonStr = jsonStr + .replace(/,\s*}/g, '}') // Remove trailing commas before closing braces + .replace(/,\s*]/g, ']') // Remove trailing commas before closing brackets + .replace(/(['"])?([a-zA-Z0-9_]+)(['"])?\s*:/g, '"$2":'); // Ensure property names are quoted + + try { + const sanitizedResult = JSON.parse(jsonStr); + return { + tags: Array.isArray(sanitizedResult.tags) ? sanitizedResult.tags : [], + correspondent: sanitizedResult.correspondent || null, + title: sanitizedResult.title || null, + document_date: sanitizedResult.document_date || null, + language: sanitizedResult.language || null + }; + } catch (finalError) { + console.error('Final JSON parsing failed after sanitization.\nThis happens when the JSON structure is too complex or invalid.\nThat indicates an issue with the generated JSON string by Ollama.\nSwitch to OpenAI for better results or fine tune your prompt.'); + //console.error('Sanitized JSON String:', jsonStr); + return { tags: [], correspondent: null }; + } + } + } catch (error) { + console.error('Error parsing Ollama response:', error.message); + console.error('Raw response:', response); + return { tags: [], correspondent: null }; + } + } } module.exports = new OllamaService(); \ No newline at end of file diff --git a/services/setupService.js b/services/setupService.js index 492422f..c6a1757 100644 --- a/services/setupService.js +++ b/services/setupService.js @@ -57,7 +57,7 @@ class SetupService { console.error('OpenAI validation error:', error.message); return false; } - }else{ + } else { return true; } } @@ -67,8 +67,11 @@ class SetupService { const response = await axios.post(`${url}/api/generate`, { model: model || 'llama2', prompt: 'Test', - stream: false - }); + stream: false, + }, + { + timeout: 10000 + }); return response.data && response.data.response; } catch (error) { console.error('Ollama validation error:', error.message); @@ -82,26 +85,28 @@ class SetupService { config.PAPERLESS_API_URL, config.PAPERLESS_API_TOKEN ); - + if (!paperlessValid) { throw new Error('Invalid Paperless configuration'); } // Validate AI provider config const aiProvider = config.AI_PROVIDER || 'openai'; - + if (aiProvider === 'openai') { const openaiValid = await this.validateOpenAIConfig(config.OPENAI_API_KEY); if (!openaiValid) { throw new Error('Invalid OpenAI configuration'); } } else if (aiProvider === 'ollama') { - const ollamaValid = await this.validateOllamaConfig( - config.OLLAMA_API_URL || 'http://localhost:11434', - config.OLLAMA_MODEL - ); - if (!ollamaValid) { - throw new Error('Invalid Ollama configuration'); + if (config.OLLAMA_SKIP_VALIDATION !== 'true') { + const ollamaValid = await this.validateOllamaConfig( + config.OLLAMA_API_URL || 'http://localhost:11434', + config.OLLAMA_MODEL + ); + if (!ollamaValid) { + throw new Error('Invalid Ollama configuration'); + } } } @@ -138,7 +143,7 @@ class SetupService { .join('\n'); await fs.writeFile(this.envPath, envContent); - + // Reload environment variables Object.entries(config).forEach(([key, value]) => { process.env[key] = value;