-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathDetectPromo.js
315 lines (291 loc) · 6.57 KB
/
DetectPromo.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
// <nowiki>
/*
- Let reviewer know when certain promotional and POV keywords are detected.
- Displays an orange bar at the top of the article, listing the detected keywords.
*/
class DetectPromo {
/** @member {string} */
wordsToSearchString = `
% growth
100%
6-figure
7-figure
8-figure
9-figure
B2B
B2C
a record
acclaimed
accomplished
are a necessity
around the world
award winning
award-winning
beloved
best available
bestselling
boasts
comprehensive
countless hours
create a revolution
critical acclaim
critical acclaim
disrupt
drastically
dynamic
elevate
eminent
engaging
entrepreneur
evangelist
excelled
exceptional
exemplified
exemplify
expert
expertise
extensive
famous
fascinating
fast growing
fast-growing
fastest growing
fastest-growing
finest
fully integrated
fully-integrated
globally
globally recognized
growing popularity
highlights
highly accomplished
highly praised
highly specialized
historic
honored with
hypnotic
illustrious
impressive
indelible
inexhaustible
influential
innovation
innovative
insights
inspired by
integrate
invaluable
invaluable
leader in
leading
leading
legendary
leverag
massive
mastermind
more than
most highly
most important
most impressive
most notable
mystical
natural charm
noteworthy
numerous
organically
outstanding
perfect
philanthropist
picturesque
pioneer
pioneering
popular destination
popularity
premiere
prestigious
prominence
prominent
promising
promulgator
ranked
reinvent
remarkable
remarkable
renowed
renowned
resonating
respected
revolutionary
rising star
save millions
savvy
seamless
sensual
several offers
showcased
signature
significant
soulful
spanning
state of art
state of the art
state-of-art
state-of-the-art
striking
super famous
tailored
tranquility
transcend
transform
underpin
ventured into
very first
visionary
wide selection
widely used
world class
world-class
worldwide
zero to hero
`;
/**
* @param {Object} mw
* @param {jQuery} $
*/
constructor( mw, $ ) {
this.mw = mw;
// eslint-disable-next-line no-jquery/variable-pattern
this.$ = $;
}
async execute() {
if ( !this.shouldRunOnThisPage() ) {
return;
}
const wordsToSearchArray = this.getWordsToSearchArray();
const title = this.mw.config.get( 'wgPageName' );
let wikicode = await this.getWikicode( title );
wikicode = this.cleanWikicode( wikicode );
const searchResultsArray = this.getSearchResultsArray( wordsToSearchArray, wikicode );
const searchResultsString = this.getSearchResultsString( searchResultsArray );
this.displayHtml( searchResultsString );
}
displayHtml( searchResultsString ) {
if ( searchResultsString ) {
const html = '<div id="DetectPromo" style="background-color: orange; margin-bottom: 5px;"><span style="font-weight: bold;">Promotional words:</span> ' + searchResultsString + '</div>';
this.$( '#contentSub' ).before( html );
}
}
/**
* @param {Array} searchResultsArray
* @return {string} searchResultsString - Example: `a record, comprehensive, drastically, entrepreneur, expert, leading, massive, more than, most important, numerous, outstanding, ranked, signature, worldwide, significant...... and more.`
*/
getSearchResultsString( searchResultsArray ) {
const MAX_DISPLAYED_RESULTS = 20;
if ( searchResultsArray.length > MAX_DISPLAYED_RESULTS ) {
searchResultsArray = searchResultsArray.slice( 0, MAX_DISPLAYED_RESULTS );
searchResultsArray.push( '...... and more.' );
}
const searchResultsString = searchResultsArray.join( ', ' );
return searchResultsString;
}
/**
* @param {Array} wordsToSearchArray
* @param {string} wikicode
* @return {Array} searchResultsArray
*/
getSearchResultsArray( wordsToSearchArray, wikicode ) {
const searchResultsArray = [];
for ( const word of wordsToSearchArray ) {
// can't use \b here because \)\b doesn't work correctly. using lookarounds instead
const regEx = new RegExp( '(?<!\\w)' + this.escapeRegEx( word ) + '(?!\\w)', 'i' );
if ( wikicode.match( regEx ) ) {
searchResultsArray.push( word );
}
}
return searchResultsArray;
}
/**
* @param {string} wikicode
* @return {string} wikicode
*/
cleanWikicode( wikicode ) {
// eliminate [[ ]], so that phrases with wikilink syntax in the middle don't mess up our search
wikicode = wikicode.replace( /\[\[/g, '' )
.replace( /\]\]/g, '' );
// Eliminate <ref></ref> and <ref />. It's OK if newspaper articles contain promo words, and they often do. We don't want to display these. We only want to display promo words in the article prose.
wikicode = wikicode.replace( /<ref[^<]*<\/ref>/gm, '' );
wikicode = wikicode.replace( /<ref[^>]*\/>/gm, '' );
return wikicode;
}
/**
* @return {Array} wordsToSearchArray
*/
getWordsToSearchArray() {
const wordsToSearchString = this.wordsToSearchString.replace( /^\/\/.*$/gm, '' ); // replace comment lines with blank lines. using this approach fixes a bug involving // and comma on the same line
let wordsToSearchArray = wordsToSearchString.replace( /, /g, '\n' )
.trim()
.split( '\n' )
.map( ( v ) => v.trim() )
.filter( ( v ) => v !== '' )
.filter( ( v ) => !v.startsWith( '//' ) );
wordsToSearchArray = this.eliminateDuplicates( wordsToSearchArray );
return wordsToSearchArray;
}
/**
* @return {boolean}
*/
shouldRunOnThisPage() {
// don't run when not viewing articles
const action = this.mw.config.get( 'wgAction' );
if ( action !== 'view' ) {
return false;
}
// don't run when viewing diffs
const isDiff = this.mw.config.get( 'wgDiffNewId' );
if ( isDiff ) {
return false;
}
const isDeletedPage = !this.mw.config.get( 'wgCurRevisionId' );
if ( isDeletedPage ) {
return false;
}
// Only run in mainspace and draftspace
const namespace = this.mw.config.get( 'wgNamespaceNumber' );
const title = this.mw.config.get( 'wgPageName' );
if ( ![ 0, 118 ].includes( namespace ) && title !== 'User:Novem_Linguae/sandbox' ) {
return false;
}
return true;
}
/**
* @param {string} title
* @return {string} wikicode
*/
async getWikicode( title ) {
const pageIsDeleted = !this.mw.config.get( 'wgCurRevisionId' );
if ( pageIsDeleted ) {
return '';
}
const api = new this.mw.Api();
const response = await api.get( {
action: 'parse',
page: title,
prop: 'wikitext',
formatversion: '2',
format: 'json'
} );
return response.parse.wikitext;
}
eliminateDuplicates( array ) {
return [ ...new Set( array ) ];
}
escapeRegEx( string ) {
return string.replace( /[.*+?^${}()|[\]\\]/g, '\\$&' ); // $& means the whole matched string
}
}
$( async () => {
await mw.loader.using( [ 'mediawiki.api' ], async () => {
await ( new DetectPromo( mw, $ ) ).execute();
} );
} );
// </nowiki>