forked from tcort/markdown-link-check
-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.js
215 lines (179 loc) · 8.15 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
'use strict';
const async = require('async');
const linkCheck = require('link-check');
const LinkCheckResult = require('link-check').LinkCheckResult;
const markdownLinkExtractor = require('markdown-link-extractor');
const ProgressBar = require('progress');
const envVarPatternMatcher = /(?<pattern>{{env\.(?<name>[a-zA-Z0-9\-_]+)}})/;
/*
* Performs some special replacements for the following patterns:
* - {{BASEURL}} - to be replaced with opts.projectBaseUrl
* - {{env.<env_var_name>}} - to be replaced with the environment variable specified with <env_var_name>
*/
function performSpecialReplacements(str, opts) {
// replace the `{{BASEURL}}` with the opts.projectBaseUrl. Helpful to build absolute urls "relative" to project roots
str = str.replace('{{BASEURL}}', opts.projectBaseUrl);
// replace {{env.<env_var_name>}} with the corresponding environment variable or an empty string if none is set.
var envVarMatch;
do {
envVarMatch = envVarPatternMatcher.exec(str);
if(!envVarMatch) {
break;
}
var envVarPattern = envVarMatch.groups.pattern;
var envVarName = envVarMatch.groups.name;
var envVarPatternReplacement = '';
if(envVarName in process.env) {
envVarPatternReplacement = process.env[envVarName];
}
str = str.replace(envVarPattern, envVarPatternReplacement);
// eslint-disable-next-line no-constant-condition
} while (true);
return str;
}
function removeCodeBlocks(markdown) {
return markdown.replace(/^```[\S\s]+?^```$/gm, '');
}
function extractHtmlSections(markdown) {
markdown =
// remove code blocks
removeCodeBlocks(markdown)
// remove HTML comments
.replace(/<!--[\S\s]+?-->/gm, '')
// remove single line code (if not escaped with "\")
.replace(/(?<!\\)`[\S\s]+?(?<!\\)`/gm, '');
const regexAllId = /<(?<tag>[^\s]+).*?id=["'](?<id>[^"']*?)["'].*?>/gmi;
const regexAName = /<a.*?name=["'](?<name>[^"']*?)["'].*?>/gmi;
const sections = []
.concat(Array.from(markdown.matchAll(regexAllId), (match) => match.groups.id))
.concat(Array.from(markdown.matchAll(regexAName), (match) => match.groups.name));
return sections
}
function extractSections(markdown) {
// First remove code blocks.
markdown = removeCodeBlocks(markdown);
const sectionTitles = markdown.match(/^#+ .*$/gm) || [];
const sections = sectionTitles.map(section =>
// The links are compared with the headings (simple text comparison).
// However, the links are url-encoded beforehand, so the headings
// have to also be encoded so that they can also be matched.
encodeURIComponent(
section
// replace links, the links can start with "./", "/", "http://", "https://" or "#"
// and keep the value of the text ($1)
.replace(/\[(.+)\]\(((?:\.?\/|https?:\/\/|#)[\w\d./?=#-]+)\)/, "$1")
// make everything (Unicode-aware) lower case
.toLowerCase()
// remove white spaces and "#" at the beginning
.replace(/^#+\s*/, '')
// remove everything that is NOT a (Unicode) Letter, (Unicode) Number decimal,
// (Unicode) Number letter, white space, underscore or hyphen
// https://ruby-doc.org/3.3.2/Regexp.html#class-Regexp-label-Unicode+Character+Categories
.replace(/[^\p{L}\p{Nd}\p{Nl}\s_\-`]/gu, "")
// remove sequences of *
.replace(/\*(?=.*)/gu, "")
// remove leftover backticks
.replace(/`/gu, "")
// Now replace remaining blanks with '-'
.replace(/\s/gu, "-")
)
);
var uniq = {};
for (var section of sections) {
if (section in uniq) {
uniq[section]++;
section = section + '-' + uniq[section];
}
uniq[section] = 0;
}
const uniqueSections = Object.keys(uniq) ?? [];
return uniqueSections;
}
module.exports = function markdownLinkCheck(markdown, opts, callback) {
if (arguments.length === 2 && typeof opts === 'function') {
// optional 'opts' not supplied.
callback = opts;
opts = {};
}
if(!opts.ignoreDisable) {
markdown = [
/(<!--[ \t]+markdown-link-check-disable[ \t]+-->[\S\s]*?<!--[ \t]+markdown-link-check-enable[ \t]+-->)/mg,
/(<!--[ \t]+markdown-link-check-disable[ \t]+-->[\S\s]*(?!<!--[ \t]+markdown-link-check-enable[ \t]+-->))/mg,
/(<!--[ \t]+markdown-link-check-disable-next-line[ \t]+-->\r?\n[^\r\n]*)/mg,
/([^\r\n]*<!--[ \t]+markdown-link-check-disable-line[ \t]+-->[^\r\n]*)/mg
].reduce(function(_markdown, disablePattern) {
return _markdown.replace(new RegExp(disablePattern), '');
}, markdown);
}
const links = markdownLinkExtractor(markdown);
const sections = extractSections(markdown).concat(extractHtmlSections(markdown));
const linksCollection = [...new Set(links)]
const bar = (opts.showProgressBar) ?
new ProgressBar('Checking... [:bar] :percent', {
complete: '=',
incomplete: ' ',
width: 25,
total: linksCollection.length
}) : undefined;
async.mapLimit(linksCollection, 2, function (link, callback) {
if (opts.ignorePatterns) {
const shouldIgnore = opts.ignorePatterns.some(function(ignorePattern) {
return ignorePattern.pattern instanceof RegExp ? ignorePattern.pattern.test(link) : (new RegExp(ignorePattern.pattern)).test(link) ? true : false;
});
if (shouldIgnore) {
const result = new LinkCheckResult(opts, link, 0, undefined);
result.status = 'ignored'; // custom status for ignored links
callback(null, result);
return;
}
}
if (opts.replacementPatterns) {
for (let replacementPattern of opts.replacementPatterns) {
let pattern = replacementPattern.pattern instanceof RegExp ? replacementPattern.pattern : new RegExp(replacementPattern.pattern, replacementPattern.global ? 'g' : '');
link = link.replace(pattern, performSpecialReplacements(replacementPattern.replacement, opts));
}
}
// Make sure it is not undefined and that the appropriate headers are always recalculated for a given link.
opts.headers = {};
if (opts.httpHeaders) {
for (const httpHeader of opts.httpHeaders) {
if (httpHeader.headers) {
for (const header of Object.keys(httpHeader.headers)) {
httpHeader.headers[header] = performSpecialReplacements(httpHeader.headers[header], opts);
}
}
for (const url of httpHeader.urls) {
if (link.startsWith(url)) {
Object.assign(opts.headers, httpHeader.headers);
// The headers of this httpHeader has been applied, the other URLs of this httpHeader don't need to be evaluated any further.
break;
}
}
}
}
let sectionLink = null;
if (link.startsWith('#')) {
sectionLink = link;
}
else if ('baseUrl' in opts && link.startsWith(opts.baseUrl)) {
if (link.substring(opts.baseUrl.length).match(/^\/*#/)) {
sectionLink = link.replace(/^[^#]+/, '');
}
}
if (sectionLink) {
const result = new LinkCheckResult(opts, sectionLink, sections.includes(sectionLink.substring(1)) ? 200 : 404, undefined);
callback(null, result);
return;
}
linkCheck(link, opts, function (err, result) {
if (opts.showProgressBar) {
bar.tick();
}
if (err) {
result = new LinkCheckResult(opts, link, 500, err);
result.status = 'error'; // custom status for errored links
}
callback(null, result);
});
}, callback);
};