forked from AliMamed/node-pdf-text
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
36 lines (29 loc) · 812 Bytes
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
var _ = require('lodash')
var Parser = require('pdf2json')
//clear the pdf logger
require('util')._logN = function() { }
//given a path to a pdf
//turn it into a json structure
module.exports = function(path, cb) {
var parser = new Parser()
parser.on('pdfParser_dataReady', function(result) {
var text = []
//get text on a particular page
result.formImage.Pages.forEach(function(page) {
var chunks = _(page.Texts).map('R').flatten().map('T').map(decodeURIComponent).value()
text = text.concat(chunks)
})
parser.destroy()
setImmediate(function() {
cb(null, text)
})
})
parser.on('pdfParser_dataError', function(err) {
parser.destroy()
cb(err)
})
if(path instanceof Buffer) {
return parser.parseBuffer(path)
}
parser.loadPDF(path)
}