Skip to content

Commit

Permalink
updated getText api to parse text from pdf and txt file
Browse files Browse the repository at this point in the history
  • Loading branch information
bhatiadheeraj committed Jul 29, 2024
1 parent 33888d9 commit 6290dae
Showing 1 changed file with 28 additions and 16 deletions.
44 changes: 28 additions & 16 deletions api/controllers/ezgov.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ const mongoose = require('mongoose');
const path = require('path');
const mkdirp = require('mkdirp');
const fs = require('fs');
const fsPromises = fs.promises;
const pdf = require('pdf-parse');
const mammoth = require("mammoth");

/**
Expand Down Expand Up @@ -348,40 +350,50 @@ router.get('/project/:projectId/file/:docId', async (req, res) => {
});




router.get('/project/:projectId/file/:docId/getText', async (req, res) => {
const projectId = req.params.projectId;
const docId = req.params.docId;
const project = await db.ezGovProjects.findById(projectId);

if (!project) {
return res.status(404).send('Project not found');
}

try {
const project = await db.ezGovProjects.findById(projectId);
if (!project) {
return res.status(404).send('Project not found');
}

const document = project.documents.id(docId);
if (!document) {
return res.status(404).send('Document not found');
}
const document = project.documents.id(docId);
if (!document) {
return res.status(404).send('Document not found');
}

try {
const filePath = document.fileUrl;
const fileExtension = path.extname(filePath).toLowerCase();

// Check if the file exists
fs.access(filePath, fs.constants.F_OK, async (err) => {
if (err) {
return res.status(404).send('File not found');
}
const result = await mammoth.extractRawText({ path: filePath });
const text = result.value;
let text = '';
if (fileExtension === '.docx') {
const result = await mammoth.extractRawText({ path: filePath });
text = result.value;
} else if (fileExtension === '.pdf') {
const dataBuffer = await fsPromises.readFile(filePath);
const result = await pdf(dataBuffer);
text = result.text;
} else if (fileExtension === '.txt') {
text = await fsPromises.readFile(filePath, 'utf8');
} else {
return res.status(400).send('Unsupported file type. Only .docx, .pdf, and .txt files are supported for text extraction.');
}
return res.status(200).send(text);
});

} catch (err) {
console.error('Internal server error:', err);
return res.status(500).send('Internal server error');
}
});



module.exports = router;

0 comments on commit 6290dae

Please sign in to comment.