Skip to content

Commit

Permalink
Addressed: #157 (comment)
Browse files Browse the repository at this point in the history
Addressed:
#142

Regarding the Ollama Service and Context size, I implemented a dynamic way of using num_ctx parameter. Now there is a "maximum" context size of 100.000 Tokens. Thats huge and most systems won't handle that.
The solution comes in handy and checks how much tokens are needed for the prompt (based on Q4_0 Quantization Size), it adds a buffer for the response of 1024 tokens (thats more enough in my eyes for the JSON response) and transfers the new calculated num_ctx to the Ollama API call.
  • Loading branch information
clusterzx committed Jan 16, 2025
1 parent 7eb4989 commit 34c25d6
Show file tree
Hide file tree
Showing 5 changed files with 165 additions and 63 deletions.
10 changes: 9 additions & 1 deletion config/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ console.log('Loaded environment variables:', {
});

module.exports = {
PAPERLESS_AI_VERSION: '2.1.9',
PAPERLESS_AI_VERSION: '2.2.0',
CONFIGURED: false,
predefinedMode: process.env.PROCESS_PREDEFINED_DOCUMENTS,
paperless: {
Expand Down Expand Up @@ -42,4 +42,12 @@ module.exports = {
"document_date": "YYYY-MM-DD",
"language": "en/de/es/..."
}`,
mustHavePrompt: ` Return the result EXCLUSIVELY as a JSON object. The Tags and Title MUST be in the language that is used in the document.:
{
"title": "xxxxx",
"correspondent": "xxxxxxxx",
"tags": ["Tag1", "Tag2", "Tag3", "Tag4"],
"document_date": "YYYY-MM-DD",
"language": "en/de/es/..."
}`,
};
34 changes: 33 additions & 1 deletion services/manualService.js
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,37 @@ class ManualService {
async _analyzeOllama(content, existingTags) {
try {
const prompt = process.env.SYSTEM_PROMPT;

const getAvailableMemory = async () => {
const totalMemory = os.totalmem();
const freeMemory = os.freemem();
const totalMemoryMB = (totalMemory / (1024 * 1024)).toFixed(0);
const freeMemoryMB = (freeMemory / (1024 * 1024)).toFixed(0);
return { totalMemoryMB, freeMemoryMB };
};

const calculateNumCtx = (promptTokenCount, expectedResponseTokens) => {
const totalTokenUsage = promptTokenCount + expectedResponseTokens;
const maxCtxLimit = 128000;

const numCtx = Math.min(totalTokenUsage, maxCtxLimit);

console.log('Prompt Token Count:', promptTokenCount);
console.log('Expected Response Tokens:', expectedResponseTokens);
console.log('Dynamic calculated num_ctx:', numCtx);

return numCtx;
};

const calculatePromptTokenCount = (prompt) => {
return Math.ceil(prompt.length / 4);
};

const { freeMemoryMB } = await getAvailableMemory();
const expectedResponseTokens = 1024;
const promptTokenCount = calculatePromptTokenCount(prompt);

const numCtx = calculateNumCtx(promptTokenCount, expectedResponseTokens);

const response = await this.ollama.post(`${config.ollama.apiUrl}/api/generate`, {
model: config.ollama.model,
Expand All @@ -76,7 +107,8 @@ class ManualService {
options: {
temperature: 0.7,
top_p: 0.9,
repeat_penalty: 1.1
repeat_penalty: 1.1,
num_ctx: numCtx,
}
});

Expand Down
178 changes: 120 additions & 58 deletions services/ollamaService.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ const config = require('../config/config');
const fs = require('fs').promises;
const path = require('path');
const paperlessService = require('./paperlessService');
const os = require('os');

class OllamaService {
constructor() {
Expand Down Expand Up @@ -34,8 +35,37 @@ class OllamaService {
await fs.writeFile(cachePath, thumbnailData);
}

await this.writePromptToFile(prompt);

const getAvailableMemory = async () => {
const totalMemory = os.totalmem();
const freeMemory = os.freemem();
const totalMemoryMB = (totalMemory / (1024 * 1024)).toFixed(0);
const freeMemoryMB = (freeMemory / (1024 * 1024)).toFixed(0);
return { totalMemoryMB, freeMemoryMB };
};

const calculateNumCtx = (promptTokenCount, expectedResponseTokens) => {
const totalTokenUsage = promptTokenCount + expectedResponseTokens;
const maxCtxLimit = 128000;

const numCtx = Math.min(totalTokenUsage, maxCtxLimit);

console.log('Prompt Token Count:', promptTokenCount);
console.log('Expected Response Tokens:', expectedResponseTokens);
console.log('Dynamic calculated num_ctx:', numCtx);

return numCtx;
};

const calculatePromptTokenCount = (prompt) => {
return Math.ceil(prompt.length / 4);
};

const { freeMemoryMB } = await getAvailableMemory();
const expectedResponseTokens = 1024;
const promptTokenCount = calculatePromptTokenCount(prompt);

const numCtx = calculateNumCtx(promptTokenCount, expectedResponseTokens);
const response = await this.client.post(`${this.apiUrl}/api/generate`, {
model: this.model,
prompt: prompt,
Expand All @@ -44,65 +74,66 @@ class OllamaService {
YOU MUSTNOT: Ask for additional information or clarification, or ask questions about the document, or ask for additional context.
YOU MUSTNOT: Return a response without the desired JSON format.
YOU MUST: Analyze the document content and extract the following information into this structured JSON format and only this format!: {
"title": "xxxxx",
"correspondent": "xxxxxxxx",
"tags": ["Tag1", "Tag2", "Tag3", "Tag4"],
"document_date": "YYYY-MM-DD",
"language": "en/de/es/..."
}
ALWAYS USE THE INFORMATION TO FILL OUT THE JSON OBJECT. DO NOT ASK BACK QUESTIONS.
`,
stream: false,
options: {
temperature: 0.7,
top_p: 0.9,
repeat_penalty: 1.1,
top_k: 7,
num_predict: 256,
num_ctx: 100000
}
// options: {
// temperature: 0.3, // Moderately low for balance between consistency and creativity
// top_p: 0.7, // More reasonable value to allow sufficient token diversity
// repeat_penalty: 1.1, // Return to original value as 1.2 might be too restrictive
// top_k: 40, // Increased from 10 to allow more token options
// num_predict: 512, // Reduced from 1024 to a more stable value
// num_ctx: 2048 // Reduced context window for more stable processing
// }
});

if (!response.data || !response.data.response) {
throw new Error('Invalid response from Ollama API');
}

const parsedResponse = this._parseResponse(response.data.response);
//console.log('Ollama response:', parsedResponse);
if(parsedResponse.tags.length === 0 && parsedResponse.correspondent === null) {
console.warn('No tags or correspondent found in response from Ollama for Document.\nPlease review your prompt or switch to OpenAI for better results.',);
}

// Match the OpenAI service response structure
return {
document: parsedResponse,
metrics: {
promptTokens: 0, // Ollama doesn't provide token metrics
completionTokens: 0,
totalTokens: 0
},
truncated: false
};

} catch (error) {
console.error('Error analyzing document with Ollama:', error);
return {
document: { tags: [], correspondent: null },
metrics: null,
"title": "xxxxx",
"correspondent": "xxxxxxxx",
"tags": ["Tag1", "Tag2", "Tag3", "Tag4"],
"document_date": "YYYY-MM-DD",
"language": "en/de/es/..."
}
ALWAYS USE THE INFORMATION TO FILL OUT THE JSON OBJECT. DO NOT ASK BACK QUESTIONS.
`,
stream: false,
options: {
temperature: 0.7,
top_p: 0.9,
repeat_penalty: 1.1,
top_k: 7,
num_predict: 256,
num_ctx: numCtx
}
// options: {
// temperature: 0.3, // Moderately low for balance between consistency and creativity
// top_p: 0.7, // More reasonable value to allow sufficient token diversity
// repeat_penalty: 1.1, // Return to original value as 1.2 might be too restrictive
// top_k: 40, // Increased from 10 to allow more token options
// num_predict: 512, // Reduced from 1024 to a more stable value
// num_ctx: 2048 // Reduced context window for more stable processing
// }
});

if (!response.data || !response.data.response) {
throw new Error('Invalid response from Ollama API');
}

const parsedResponse = this._parseResponse(response.data.response);
//console.log('Ollama response:', parsedResponse);
if(parsedResponse.tags.length === 0 && parsedResponse.correspondent === null) {
console.warn('No tags or correspondent found in response from Ollama for Document.\nPlease review your prompt or switch to OpenAI for better results.',);
}

await this.writePromptToFile(prompt + "\n\n" + JSON.stringify(parsedResponse));
// Match the OpenAI service response structure
return {
document: parsedResponse,
metrics: {
promptTokens: 0, // Ollama doesn't provide token metrics
completionTokens: 0,
totalTokens: 0
},
truncated: false
};

} catch (error) {
console.error('Error analyzing document with Ollama:', error);
return {
document: { tags: [], correspondent: null },
metrics: null,
error: error.message
};
}
}


async writePromptToFile(systemPrompt) {
const filePath = './logs/prompt.txt';
const maxSize = 10 * 1024 * 1024;
Expand All @@ -127,6 +158,37 @@ class OllamaService {

async analyzePlayground(content, prompt) {
try {

const getAvailableMemory = async () => {
const totalMemory = os.totalmem();
const freeMemory = os.freemem();
const totalMemoryMB = (totalMemory / (1024 * 1024)).toFixed(0);
const freeMemoryMB = (freeMemory / (1024 * 1024)).toFixed(0);
return { totalMemoryMB, freeMemoryMB };
};

const calculateNumCtx = (promptTokenCount, expectedResponseTokens) => {
const totalTokenUsage = promptTokenCount + expectedResponseTokens;
const maxCtxLimit = 128000;

const numCtx = Math.min(totalTokenUsage, maxCtxLimit);

console.log('Prompt Token Count:', promptTokenCount);
console.log('Expected Response Tokens:', expectedResponseTokens);
console.log('Dynamic calculated num_ctx:', numCtx);

return numCtx;
};

const calculatePromptTokenCount = (prompt) => {
return Math.ceil(prompt.length / 4);
};

const { freeMemoryMB } = await getAvailableMemory();
const expectedResponseTokens = 1024;
const promptTokenCount = calculatePromptTokenCount(prompt);

const numCtx = calculateNumCtx(promptTokenCount, expectedResponseTokens);

const response = await this.client.post(`${this.apiUrl}/api/generate`, {
model: this.model,
Expand All @@ -151,7 +213,7 @@ class OllamaService {
repeat_penalty: 1.1,
top_k: 7,
num_predict: 256,
num_ctx: 100000
num_ctx: numCtx
}
// options: {
// temperature: 0.3, // Moderately low for balance between consistency and creativity
Expand Down Expand Up @@ -207,7 +269,7 @@ class OllamaService {
promptTags = process.env.PROMPT_TAGS;
systemPrompt = config.specialPromptPreDefinedTags;
} else {
systemPrompt = process.env.SYSTEM_PROMPT;
systemPrompt = process.env.SYSTEM_PROMPT + '\n\n' + config.mustHavePrompt;
}

// Format existing tags
Expand Down
4 changes: 2 additions & 2 deletions services/openaiService.js
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,10 @@ class OpenAIService {
systemPrompt = `
Prexisting tags: ${existingTagsList}\n\n
Prexisiting correspondent: ${existingCorrespondentList}\n\n
` + process.env.SYSTEM_PROMPT;
` + process.env.SYSTEM_PROMPT + '\n\n' + config.mustHavePrompt;
promptTags = '';
} else {
systemPrompt = process.env.SYSTEM_PROMPT;
systemPrompt = process.env.SYSTEM_PROMPT + '\n\n' + config.mustHavePrompt;
promptTags = '';
}
if (process.env.USE_PROMPT_TAGS === 'yes') {
Expand Down
2 changes: 1 addition & 1 deletion services/setupService.js
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ class SetupService {
const envContent = Object.entries(config)
.map(([key, value]) => {
if (key === "SYSTEM_PROMPT") {
return `${key}=\`${value}\n${JSON_STANDARD_PROMPT}\``;
return `${key}=\`${value}\n\``;
}
return `${key}=${value}`;
})
Expand Down

0 comments on commit 34c25d6

Please sign in to comment.