-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawler.js
71 lines (57 loc) · 2.15 KB
/
crawler.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
const path = require("path");
const fs = require("fs");
const cheerio = require("cheerio");
const sa = require("superagent");
/**
*
* @param {String} url:https://huaban.com/explore/guzhen
* @param {Number} limit:img number
* @param {Boolean} isCut:is cut image
*/
module.exports = function({ query, limit }, isCut) {
return new Promise((resolve, rejected) => {
let baseUrl = "https://huaban.com/search/";
query = require("querystring").escape(query);
let url = `${baseUrl}?q=${query}&type=pins&k2q1tu9d&page=3&per_page=${limit}&wfl=1`;
sa.get(url)
.then(res => {
const $ = cheerio.load(res.text);
// fs.writeFile(path.resolve(__dirname, "huaban.html"), res.text, err => {
// if (err) {
// throw err;
// }
// });
let imgs = {};
const eles = $("body").find("script");
let contents = eles
.first()
.contents()
.text();
let reg = /.*app\.page\["pins"\]\s+=\s+(\[{.+}\])/gm,
result,
source;
source = reg.exec(contents);
if (!source || source.length === 0) {
rejected("没有查询到相关图片");
}
result = JSON.parse(source[1]);
result.forEach((item, index) => {
let imgCdn = "http://hbimg.huabanimg.com";
let { file } = item;
imgs[index] = `${imgCdn}/${file.key}${isCut ? "_fw236" : ""}?width=${file.width}&height=${
file.height
}`;
});
let data = JSON.stringify(imgs, null, " ");
fs.writeFile(path.resolve(__dirname, "data.json"), data, err => {
if (err) {
throw err;
}
});
resolve(data);
})
.catch(err => {
rejected("采集发生错误");
});
});
};