Skip to content

Commit

Permalink
Merge pull request #4 from Libertai/al/uploading-within-chat
Browse files Browse the repository at this point in the history
feat: upload file within chat and updated knowledge db
  • Loading branch information
amiller68 authored Apr 15, 2024
2 parents 98ad034 + cdaad60 commit 7d2d58a
Show file tree
Hide file tree
Showing 9 changed files with 249 additions and 263 deletions.
8 changes: 4 additions & 4 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"build": "quasar build"
},
"dependencies": {
"@libertai/libertai-js": "^0.0.0",
"@libertai/libertai-js": "0.0.3",
"@quasar/extras": "^1.16.4",
"@solana/web3.js": "^1.90.0",
"aleph-sdk-ts": "^3.9.2",
Expand Down
2 changes: 1 addition & 1 deletion quasar.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ module.exports = configure(function (/* ctx */) {
},

vueRouterMode: 'hash', // available values: 'hash', 'history'
extendViteConf(viteConf, { isServer, isClient }) {
extendViteConf(viteConf, { _isServer, _isClient }) {
if (viteConf.build.commonjsOptions == undefined) {
viteConf.build.commonjsOptions = {};
}
Expand Down
234 changes: 105 additions & 129 deletions src/components/KnowledgeStoreUploader.js
Original file line number Diff line number Diff line change
@@ -1,107 +1,64 @@
import { createUploaderComponent } from 'quasar';
import { computed, ref } from 'vue';
import { useKnowledgeStore } from '../stores/knowledge-store';
// import * as pdfjsLib from 'pdfjs-dist/webpack';
// console.log(pdfjsLib)
const pdfjsLib = window.pdfjsLib;

/* pdf upload example code:
async extractTextFromPdf(pdfUrl) {
console.log("called")
// set loading to true before processing the PDF
this.loading = true;
const pdf = await window.pdfjsLib.getDocument(pdfUrl).promise;
console.log(pdf)
const maxPages = pdf.numPages;
let textContent = [];
for (let i = 1; i <= maxPages; i++) {
const page = await pdf.getPage(i);
const content = await page.getTextContent();
const pageTextContent = content.items.map(item => item.str).join('');
textContent.push(pageTextContent);
}
console.log("textContent")
// set loading to false when the PDF processing is complete
this.loading = false;
return textContent.join('\n');
},
async handleFileUpload(event) {
console.log(event)
const file = event.target.files[0];
const extension = file.name.split('.').pop();
if (extension !== 'pdf') {
alert('Please select a PDF file');
return;
}
const reader = new FileReader();
reader.onload = async () => {
const dataUrl = reader.result;
// set loading to true before extracting text from the PDF
this.loading = true;
const text = await this.extractTextFromPdf(dataUrl);
this.extractedText = text;
this.localValue.context_document = text;
// set loading to false when the text extraction is complete
this.loading = false;
};

reader.readAsDataURL(file);
},
// State
import { useKnowledgeStore } from 'src/stores/knowledge-store';

*/
// Get PDF.js from the window object
const pdfjsLib = window.pdfjsLib;

// export a Vue component
export default createUploaderComponent({
name: 'KnowledgeStoreUploader',
props: {
// ...your custom props
},
emits: [
// ...your custom events name list
],
injectPlugin({ props, emit, helpers }) {
props: {},
emits: [],
injectPlugin({ _props, _emit, helpers }) {
const loading = ref(false);

async function extractTextFromPdf(pdfUrl) {
console.log('called');
// set loading to true before processing the PDF
// Map of file objects to their status as either 'queued', 'uploading', 'embedding', 'uploaded', or 'failed'
const fileStatus = ref({});

// Upload Logic
async function upload(_args) {
// Set the loading state
loading.value = true;
console.log(pdfUrl);
let pdf;
try {
pdf = await pdfjsLib.getDocument(pdfUrl).promise;
} catch (error) {
console.log(error);
}
console.log(pdf);
const maxPages = pdf.numPages;
let textContent = [];
const files = helpers.queuedFiles.value;
console.log(`components::KnowledgeStoreUploader::upload - files: ${files}`);
fileStatus.value = {};
fileStatus.value = files.reduce((acc, file) => {
acc[file.name] = 'queued';
return acc;
});

// Load our state
const knowledgeStore = useKnowledgeStore();

for (let i = 1; i <= maxPages; i++) {
const page = await pdf.getPage(i);
const content = await page.getTextContent();
const pageTextContent = content.items.map((item) => item.str).join('\n\n---\n\n');
textContent.push(pageTextContent);
let uploads = [];
// TODO: workers would be preferred here
// Handle Each File in Sequence
for (let file of files) {
let result = async () => {
try {
fileStatus.value[file.name] = 'uploading';
helpers.updateFileStatus(file, 'uploading');
let { title, text } = await processFile(file);
fileStatus.value[file.name] = 'embedding';
helpers.updateFileStatus(file, 'embedding');
await knowledgeStore.addDocument(title, text);
fileStatus.value[file.name] = 'uploaded';
helpers.updateFileStatus(file, 'uploaded');
} catch (error) {
console.error(error);
fileStatus.value[file.name] = 'failed';
helpers.updateFileStatus(file, 'failed');
console.error(`components::KnowledgeStoreUploader::upload - error: ${error}`);
}
};
uploads.push(result());
}
console.log(textContent);

// set loading to false when the PDF processing is complete
// Resolve all uploads
await Promise.all(uploads);
// Reset the loading state
loading.value = false;

return textContent.join('\n');
}

const isUploading = computed(() => {
Expand All @@ -115,51 +72,70 @@ export default createUploaderComponent({
});

function abort() {
// ...
fileStatus.value = {};
}

async function upload(args) {
console.log(args);
console.log(props, emit, helpers);
const files = helpers.queuedFiles.value;
console.log(files);

const knowledgeStore = useKnowledgeStore();
/**
* Extract title and text content from a file.
* Supports PDF and plain text files.
* @param {File} file - The file to process.
* @returns {Promise<{ title: string; text: string }>} - The extracted title and text content.
*/
async function processFile(file) {
const title = file.name;
let extractedText = '';

// now for each file, handle it.
for (let file of files) {
console.log(file);
if (file.type === 'application/pdf') {
const url = URL.createObjectURL(file);
const text = await extractTextFromPdf(url);
const title = file.name;
await knowledgeStore.addDocument(title, text);
helpers.updateFileStatus(file, 'uploaded');
} else if (file.type === 'text/plain') {
const reader = new FileReader();
reader.onload = async (event) => {
const content = event.target.result;
const title = file.name;
await knowledgeStore.addDocument(title, content);
helpers.updateFileStatus(file, 'uploaded');
};
reader.readAsText(file);
} else {
helpers.updateFileStatus(file, 'failed');
try {
switch (file.type) {
case 'application/pdf':
extractedText = await extractTextFromPdfFile(file);
break;
case 'text/plain':
extractedText = await new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = (event) => resolve(event.target.result);
reader.onerror = (error) => reject(error);
reader.readAsText(file);
});
break;
default:
throw new Error(`Unsupported file type: ${file.type}`);
}
} catch (error) {
console.error('Error processing file:', error);
throw error;
}

// const file = props.files[0]
// if (file.type === 'application/pdf') {
// const reader = new FileReader()
// reader.onload = async (event) => {
// const content = event.target.result
// const title = file.name
// await useKnowledge.addDocument(title, content)
// }
// reader.readAsText(file)
// }
return { title, text: extractedText };
}

/**
* Extract text from a PDF file
* @param {File} file
* @returns {Promise<string>}
*/
async function extractTextFromPdfFile(file) {
const pdfUrl = URL.createObjectURL(file);

let pdf;
try {
pdf = await pdfjsLib.getDocument(pdfUrl).promise;
} catch (error) {
console.error(`components::KnowledgeStoreUploader::extractTextFromPdfFile - error: ${error}`);
throw new Error('Failed to extract text from PDF');
}
const maxPages = pdf.numPages;
let textContent = [];

for (let i = 1; i <= maxPages; i++) {
const page = await pdf.getPage(i);
const content = await page.getTextContent();
const pageTextContent = content.items.map((item) => item.str).join(' ');
textContent.push(pageTextContent);
}
return textContent.join('');
}

return {
isUploading,
isBusy,
Expand Down
2 changes: 1 addition & 1 deletion src/components/MarkdownRenderer.vue
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ const marked = new Marked(
},
markedHighlight({
langPrefix: 'hljs language-',
highlight(code, lang, info) {
highlight(code, lang, _info) {
const language = hljs.getLanguage(lang) ? lang : 'plaintext';
return hljs.highlight(code, { language }).value;
},
Expand Down
5 changes: 0 additions & 5 deletions src/components/MessageInput.vue
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,4 @@ const sendMessage = (event) => {
let content = model.value;
emit('sendMessage', content);
};
// return {
// model,
// isLoading,
// sendMessage
// }
</script>
Loading

0 comments on commit 7d2d58a

Please sign in to comment.