-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
122 lines (105 loc) · 4.23 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
const fs = require('fs');
const puppeteer = require('puppeteer');
const argv = require('minimist')(process.argv.slice(2));
const readline = require('readline');
const A = require('async');
const sharp = require('sharp');
const path = require('path');
const phash = require('imghash').hash;
(async () => {
const concurrency = parseInt(argv.procs) || 1;
const force = argv.force !== undefined;
const base = argv.base;
const user = process.env.USER;
const pass = process.env.PASS;
const ext = 'jpg';
const auth = new Buffer(`${user}:${pass}`).toString('base64');
let browser = await puppeteer.launch({
ignoreHTTPSErrors: true,
timeout: 40000
});
console.log(`Browser launched with concurrency ${concurrency} (force: ${force})`);
let q = A.queue(function (task, callback) {
browser.newPage().then(async page => {
try {
const thumb = await task.path.replace(`.${ext}`, `.thumb.${ext}`)
await page.setExtraHTTPHeaders({
'Authorization': `Basic ${auth}`
});
const response = await page.goto(task.url, { waitUntil: 'load' });
if (response.status !== 200)
throw `Cannot load page because status is ${response.status}`;
await page.screenshot({
fullPage: true,
path: task.path
});
sharp(task.path)
.resize(128, 128)
.crop('north')
.toFile(thumb)
.then(() => {
phash(thumb)
.then(hash => {
const
bn_thumb = path.basename(thumb),
bn = path.basename(task.path),
dn_thumb = path.dirname(thumb),
dn = path.dirname(task.path),
thumb_to = path.join(dn_thumb, `${hash}-${bn_thumb}`),
img_to = path.join(dn, `${hash}-${bn}`);
fs.rename(thumb, thumb_to, (e) => {
if (e)
console.error(`Cannot rename ${thumb} -> ${thumb_to}: ${e}`);
else
console.log(`Done ${thumb} -> ${thumb_to}`);
});
fs.rename(task.path, img_to, (e) => {
if (e)
console.error(`Cannot rename ${task.path} -> ${img_to}: ${e}`);
else
console.log(`Done ${task.path} -> ${img_to}`);
});
});
console.log(`Created ${thumb}`);
});
} catch (e) {
console.error(`Error: ${e}`);
} finally {
try {
await page.close();
} catch (e) {
console.error(`Could not close page: ${e}`)
} finally {
await callback(task.url);
}
}
});
}, concurrency);
q.drain = async function() {
console.log('All tasks processed');
try {
if (browser)
await browser.close()
} catch (e) {
console.error(`Could not close browser: ${e}`);
}
}
readline.createInterface({
input: process.stdin
}).on('line', line => {
const row = line.split(' ');
const fpath = path.join(base, row[0]);
if (!fs.existsSync(fpath) || force) {
q.push({
path: fpath,
url: row[1]
}, function (url) {
console.log(`Done with URL ${url}`);
});
} else {
console.log(`Not processing ${line}: file exists!`)
}
}).on('close', () => {
console.log(`All data parsed: ${q.length()} items enqueued`);
});
})();