-
Notifications
You must be signed in to change notification settings - Fork 9
/
index.js
100 lines (75 loc) · 2.12 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
'use strict'
const cheerio = require('cheerio')
class HtmlTableToJson {
constructor(html, options = {}) {
if (typeof html !== 'string') {
throw new TypeError('html input must be a string')
}
this.html = html
this.opts = options
this._$ = cheerio.load(this.html)
this._results = []
this._headers = []
this._count = null
this._firstRowUsedAsHeaders = []
this._process()
}
static parse(html, options) {
return new HtmlTableToJson(html, options)
}
get count() {
if (Number.isInteger(this._count) === false) {
this._count = this._$('table').get().length
}
return this._count
}
get results() {
return this.opts.values === true ?
this._results.map(result => result.map(r => Object.values(r))) :
this._results
}
get headers() {
return this._headers
}
_process() {
if (this._results.length > 0) {
return this._results
}
this._$('table').each((i, element) => this._processTable(i, element))
return this._results
}
_processTable(tableIndex, table) {
this._results[tableIndex] = []
this._buildHeaders(tableIndex, table)
this._$(table).find('tr').each((i, element) => this._processRow(tableIndex, i, element))
this._pruneEmptyRows(tableIndex)
}
_processRow(tableIndex, index, row) {
if (index === 0 && this._firstRowUsedAsHeaders[tableIndex] === true) {
return
}
this._results[tableIndex][index] = {}
this._$(row).find('td').each((i, cell) => {
this._results[tableIndex][index][this._headers[tableIndex][i] || (i + 1)] = this._$(cell).text().trim()
})
}
_buildHeaders(index, table) {
this._headers[index] = []
this._$(table).find('tr').each((i, row) => {
this._$(row).find('th').each((j, cell) => {
this._headers[index][j] = this._$(cell).text().trim()
})
})
if (this._headers[index].length > 0) {
return
}
this._firstRowUsedAsHeaders[index] = true
this._$(table).find('tr').first().find('td').each((j, cell) => {
this._headers[index][j] = this._$(cell).text().trim()
})
}
_pruneEmptyRows(tableIndex) {
this._results[tableIndex] = this._results[tableIndex].filter(t => Object.keys(t).length)
}
}
module.exports = HtmlTableToJson