-
Notifications
You must be signed in to change notification settings - Fork 1
/
readable.js
64 lines (50 loc) · 1.58 KB
/
readable.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env node
const axios = require("axios");
const { Readability } = require("@mozilla/readability");
const { JSDOM } = require("jsdom");
const fs = require("fs");
const TurndownService = require("turndown");
async function main() {
const args = process.argv.slice(2);
const shouldConvertToMarkdown = args.includes("-md");
if (shouldConvertToMarkdown) {
args.splice(args.indexOf("-md"), 1);
}
const url = args[0];
const outputFile = args[1];
if (!url || !outputFile) {
console.error("Usage: node extractContent.js [-md] <URL> <output_file>");
process.exit(1);
}
await fetchAndExtractContent(url, outputFile, shouldConvertToMarkdown);
}
async function fetchAndExtractContent(url, outputFile, shouldConvertToMarkdown) {
try {
const response = await axios.get(url);
const { data } = response;
const doc = new JSDOM(data, {
url: url,
});
const reader = new Readability(doc.window.document);
const article = reader.parse();
if (!article) {
console.error("Error parsing the main content");
return;
}
let content = article.content;
if (shouldConvertToMarkdown) {
const turndownService = new TurndownService();
content = turndownService.turndown(content);
}
fs.writeFile(outputFile, content, (err) => {
if (err) {
console.error("Error writing the main content to the output file:", err);
return;
}
console.log("Main content extracted and saved to:", outputFile);
});
} catch (error) {
console.error("Error fetching the URL:", error);
}
}
main();