-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.js
169 lines (142 loc) · 5.83 KB
/
app.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
const puppeteer = require("puppeteer-extra");
const StealthPlugin = require("puppeteer-extra-plugin-stealth");
puppeteer.use(StealthPlugin());
const fs = require('fs');
const location = process.argv[2];
function timer(ms) {
return new Promise(res => setTimeout(res, ms));
}
let path = [];
let queryString = "식당"
async function divide(page, leftTop, rightBottom, zoom) {
const mid = [(leftTop[0] + rightBottom[0]) / 2, (leftTop[1] + rightBottom[1]) / 2];
let result = false;
while(!result) {
try {
await page.goto(`https://map.kakao.com/?urlX=${mid[0]}&urlY=${mid[1]}&urlLevel=${zoom}&q=${queryString}¤tBound=true`);
result = true;
}
catch (err) {
result = false;
}
await timer(2000);
}
let resultCnt = await page.evaluate(() => {
return document.getElementById("info.search.place.cnt").innerText;
});
resultCnt = Number(resultCnt.replace(',', ''));
// normal case
if (resultCnt > 500) {
path.push(1);
await divide(page, leftTop, mid, zoom - 1);
path.pop();
path.push(2);
await divide(page, [mid[0], leftTop[1]], [rightBottom[0], mid[1]], zoom - 1);
path.pop();
path.push(3);
await divide(page, [leftTop[0], mid[1]], [mid[0], rightBottom[1]], zoom - 1);
path.pop();
path.push(4);
await divide(page, mid, rightBottom, zoom - 1);
path.pop();
}
// base case
else if (resultCnt != 0) {
await page.click('.option1')
while(true) {
await timer(1000);
let places = [];
// page가 있는지 체크 (result가 15개 이하)
const isPageExist = await page.evaluate(() => {
return document.getElementById("info.search.page").className == "pages";
})
// page가 없는 경우 -> 한 번만 실행
if (!isPageExist) {
places = await page.evaluate(() => {
let place = document.getElementsByClassName("PlaceItem clickArea");
let ret = [];
for (let i = 0; i < place.length; ++i) {
ret.push({
title : place[i].children[2].getElementsByClassName("link_name")[0].innerText,
category : place[i].children[2].getElementsByClassName("subcategory")[0].innerText,
address : place[i].children[4].children[1].children[0].innerText,
detailLink : place[i].children[4].children[5].getElementsByClassName("moreview")[0].href,
});
}
return ret;
})
for (let place of places) {
fs.appendFileSync("./places.json", JSON.stringify(place));
fs.appendFileSync("./places.json", ",\n\t");
}
break;
}
// page가 있는 경우
else {
// page개수를 구함
const pageLength = await page.evaluate(() => {
let ret = 1;
for (; ret <= 5; ++ret) {
if (document.getElementById(`info.search.page.no${ret}`).className == "INACTIVE HIDDEN")
break;
}
return ret;
})
// page 클릭 후 crawl
for (let pageNumber = 1; pageNumber < pageLength; ++pageNumber) {
await page.click(`#info\\.search\\.page\\.no${pageNumber}`);
await timer(1000);
places = await page.evaluate(() => {
let place = document.getElementsByClassName("PlaceItem clickArea");
let ret = [];
for (let i = 0; i < place.length; ++i) {
ret.push({
title : place[i].children[2].getElementsByClassName("link_name")[0].innerText,
category : place[i].children[2].getElementsByClassName("subcategory")[0].innerText,
address : place[i].children[4].children[1].children[0].innerText,
detailLink : place[i].children[4].children[5].getElementsByClassName("moreview")[0].href,
});
}
return ret;
})
for (let place of places) {
fs.appendFileSync("./places.json", JSON.stringify(place));
fs.appendFileSync("./places.json", ",\n\t");
}
}
// 더 page가 넘어가지는지 check
const isEndOfResult = await page.evaluate(() => {
if (document.getElementById("info.search.page.next").className == "next disabled")
return true;
document.getElementById("info.search.page.next").click();
return false;
})
// 더 이상 못 넘어가는 경우 stop
if (isEndOfResult)
break;
}
}
}
}
async function doPuppeteer() {
const options = {
headless: false,
ignoreHTTPSErrors: true,
args : ['--window-size=1405,1130'],
};
const browser = await puppeteer.launch(options);
const page = await browser.newPage();
// Go to login page that redirect to userPage
try {
await divide(page, [-783715, 2422386], [1749487, -152403], 13);
}
catch (err) {
console.log(err);
console.log(path);
browser.close();
process.exit();
}
browser.close();
process.exit();
}
doPuppeteer();