-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathRequestedArticleSifter.js
141 lines (116 loc) · 4.93 KB
/
RequestedArticleSifter.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
// <nowiki>
// [[Wikipedia:Requested Articles]] Cleanup Tool
// When the user EDITS a page starting with [[Wikipedia:Requested articles/Business and economics/Companies]], this script adds a "Remove Bullets With Too Few References" button that the user can click.
// Clicking it will delete all bullets with 1 or 0 URL's, because these entries will not pass [[WP:GNG]].
// Certain things will safelist the bullet, including clerk endorsement, a link to a draft, and some other conditions.
// The [[WP:RA]] company lists are massive. So far, this tool has helped delete 150 companies that did not follow our submission instructions that require providing enough sources.
'use strict';
class CleanUp {
constructor() {
this.log = '';
this.deleteCount = 0;
}
deleteTooFewReferences( code ) {
// \n{{ and \n== and \n*
// templates, sections, and bullets (of bulleted list)
const sectionDelimiters = /(\n\{\{|\n==|\n\*)/g;
// get strpos of every delimiter
let result;
const indices = [ 0 ];
while ( ( result = sectionDelimiters.exec( code ) ) ) {
indices.push( result.index );
}
// Loop through every chunk.
// A chunk is usually one line/bullet/entry, but can also be a section or a template.
result = '';
let deleteCount = 0;
let safelistedCount = 0;
let totalCount = 0;
for ( const key in indices ) {
// figure out the chunk
const startPos = indices[ key ];
const endPos = indices[ ( parseInt( key ) + 1 ).toString() ] || code.length;
const chunk = code.slice( startPos, endPos );
// determine company name
// skip this chunk if it's not a company name
// *[[company name]]
const companyNameRegEx = / *\* *\[\[(.+?)\]\]/;
let companyName = chunk.match( companyNameRegEx );
if ( !companyName ) {
result += chunk;
continue;
}
companyName = companyName[ 1 ];
totalCount++;
// count number of links (by searching for http)
// let countRegEx = /(<ref|(?<!\[)(?<!>)\[(?!\[)|[^\[>=\/]http)/g;
// let countRegEx = /http/g;
const countRegEx = /(http|<ref.+?<\/ref>)/g;
const count = ( chunk.match( countRegEx ) || [] ).length;
this.log += totalCount + ' Company Name: ' + companyName + ' || Ref Count: ' + count;
/* skip if
- clerk {{endorsed}} or {{likely}}
- draft created
- links to wikipedia articles in other languages
- link to google search
*/
const safelistedRegEx = /({{[Ee]ndorse}}|{{[Ll]ikely}}|[Dd]raft|[^en]\.wikipedia\.org|google\.com\/search)/;
const safelisted = chunk.match( safelistedRegEx );
if ( safelisted ) {
safelistedCount++;
this.log += ' || SAFELISTED ====================================\n';
result += chunk;
continue;
}
// delete?
const remove = ( count < 2 );
if ( remove ) {
deleteCount++;
this.log += ' || DELETED ************************************\n';
} else {
result += chunk;
this.log += '\n';
}
}
this.log = 'WP:RA Cleanup Tool - Debug Log\nDeleted Entries: ' + deleteCount + '\nSafelisted: ' + safelistedCount + '\nTotal Companies Before: ' + totalCount + '\nTotal Companies After: ' + ( totalCount - deleteCount ) + '\n\n' + this.log;
this.deleteCount = deleteCount;
return result;
}
getLog() {
return this.log;
}
getEditSummary() {
return this.deleteCount ? 'Delete ' + this.deleteCount + ' companies with insufficient sources (0 or 1 sources). Need at least 2, preferrably 3 or more, to prove [[WP:GNG]]. See also [[WP:3REFS]]. Using userscript [[User:Novem_Linguae/wp-ra.js]]' : '';
}
}
// if correct page
const pageName = mw.config.get( 'wgPageName' );
const isCompanyPage = pageName.startsWith( 'Wikipedia:Requested_articles/Business_and_economics/Companies' );
const isEditPage = ( mw.config.get( 'wgAction' ) === 'edit' || mw.config.get( 'wgAction' ) === 'submit' /* edit preview */ );
if ( isCompanyPage && isEditPage ) {
// create "Remove Bullets With Too Few References" button x2
// must have type="button", or it will submit the form
$( '#bodyContent' ).prepend( '<button type="button" class="wp-ra-button" style="margin-bottom: 1em;">Remove Bullets With Too Few References</button>' );
$( '.editOptions' ).prepend( `<button type="button" class="wp-ra-button" style="margin-bottom: 1em;">Remove Bullets With Too Few References</button>
<textarea id="wp-ra-log" style="display: none; height: 15em;"></textarea>
` );
$( '.wp-ra-button' ).on( 'click', () => {
const cu = new CleanUp();
// execute script
// replaces value of edit textarea with our modified wikicode
const $code = $( '#wpTextbox1' );
$code.val( cu.deleteTooFewReferences( $code.val() ) );
// display debug log
$( '#wp-ra-log' ).val( cu.getLog() ).show();
// set edit summary
$( '#wpSummary' ).val( cu.getEditSummary() );
} );
}
/*
TODO:
- google search links should not be exempt
- Look through other WP:RA's for weird formatting. Make sure this can handle that.
- Need to handle nested bulleted lists.
- make sure {{isbn|18927639821}} and {{ISBN|892037923}} are counted
*/
// </nowiki>