-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
32 lines (29 loc) · 1.16 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
const path = require('path');
const fs = require('fs');
const WebCrawler = require('./src/web_crawler');
const { sortMapKeys } = require('./src/utils');
const START_URL = process.argv[2] || process.env.START_URL;// || 'http://wiprodigital.com';
if (!START_URL) throw new Error('Pass start URL as first CLI parameter or set "START_URL" environment variable');
const saveToJson = (crawl_result, file_name) => {
const file_dir = path.resolve(__dirname, 'results');
if (!fs.existsSync(file_dir)){
fs.mkdirSync(file_dir);
}
const file_path = `${path.resolve(file_dir, file_name)}.json`;
console.log(`Exporting results to ${file_path}`);
const data = {
linkUrls: sortMapKeys(crawl_result.linkUrls),
mediaUrls: sortMapKeys(crawl_result.mediaUrls),
externalUrls: sortMapKeys(crawl_result.externalUrls),
deadLinks: sortMapKeys(crawl_result.deadLinks)
}
fs.writeFileSync(file_path, JSON.stringify(data, null, 2));
}
(async () => {
const crawler = new WebCrawler();
const domain = START_URL.split('//')[1];
console.time('crawl');
saveToJson(await crawler.crawl(START_URL), domain);
console.timeEnd('crawl');
console.log('Crawl complete!');
})();