-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscrape_list.js
74 lines (65 loc) · 1.86 KB
/
scrape_list.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
// @ts-check
/**
* Run in console on the 1st Drupal content page.
*/
/**
* Fetch an HTML document
* @param {RequestInfo} input
* @param {RequestInit} [init]
*/
async function fetchDocument(input, init) {
const res = await fetch(input, init);
const text = await res.text();
return new DOMParser().parseFromString(text, 'text/html');
}
/**
* Get link to next page
* @param {ParentNode} page
*/
function nextPage(page) {
const current = page.querySelector('.pager-current');
if (current.nextElementSibling == null) {
return null; // Last page.
}
return current.nextElementSibling.querySelector('a').href;
}
/**
* Extract all the event posts from this page
* @param {ParentNode} page
*/
function* findLinks(page) {
const rows = page.querySelectorAll('.table-select-processed tbody tr');
for (const row of rows) {
const [
_c,
titleCell,
typeCell,
authorCell,
_s,
updatedCell
] = row.children;
yield {
title: titleCell.textContent,
link: titleCell.querySelector('a').href,
type: typeCell.textContent,
author: {
username: authorCell.textContent,
link: authorCell.querySelector('a').href
},
date: updatedCell.textContent
};
}
}
function main() {
const linkData = JSON.stringify(Array.from(findLinks(document)));
const file = new Blob([linkData], { type: 'text/plain' });
const url = URL.createObjectURL(file);
// Make a link element and click it to download the pdf.
const download = document.createElement('a');
download.href = url;
download.download = `drupal.json`;
download.dispatchEvent(new MouseEvent('click'));
URL.revokeObjectURL(url);
location.assign(nextPage(document));
}
main();