-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathm-delete.py
291 lines (234 loc) · 10.3 KB
/
m-delete.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
#!/usr/bin/python
"""
This script can be used to delete and undelete pages en masse.
Of course, you will need an admin account on the relevant wiki.
These command line parameters can be used to specify which pages to work on:
¶ms;
Furthermore, the following command line parameters are supported:
-always Don't prompt to delete pages, just do it.
-summary:XYZ Set the summary message text for the edit to XYZ.
-undelete Actually undelete pages instead of deleting.
Obviously makes sense only with -page and -file.
-isorphan Alert if there are pages that link to page to be
deleted (check 'What links here').
By default it is active and only the summary per namespace
is be given.
If given as -isorphan:n, n pages per namespace will be shown,
If given as -isorphan:0, only the summary per namespace will
be shown,
If given as -isorphan:n, with n < 0, the option is disabled.
This option is disregarded if -always is set.
-orphansonly: Specified namespaces. Separate multiple namespace
numbers or names with commas.
Examples:
-orphansonly:0,2,4
-orphansonly:Help,MediaWiki
Note that Main ns can be indicated either with a 0 or a ',':
-orphansonly:0,1
-orphansonly:,Talk
Usage:
python pwb.py delete [-category categoryName]
Examples
--------
Delete everything in the category "To delete" without prompting:
python pwb.py delete -cat:"To delete" -always
"""
#
# (C) Pywikibot team, 2013-2019
#
# Distributed under the terms of the MIT license.
#
from __future__ import absolute_import, division, unicode_literals
import collections
from warnings import warn
import pywikibot
from pywikibot import exceptions
from pywikibot import i18n, pagegenerators
from pywikibot.bot import MultipleSitesBot, CurrentPageBot
from pywikibot.page import Page
from pywikibot.tools import islice_with_ellipsis
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {'¶ms;': pagegenerators.parameterHelp} # noqa: N816
class PageWithRefs(Page):
"""
A subclass of Page with convenience methods for reference checking.
Supports the same interface as Page, with some added methods.
"""
def __init__(self, source, title='', ns=0):
"""Initializer."""
super(PageWithRefs, self).__init__(source, title, ns)
_cache_attrs = list(super(PageWithRefs, self)._cache_attrs)
_cache_attrs = tuple(_cache_attrs + ['_ref_table'])
def get_ref_table(self, *args, **kwargs):
"""Build mapping table with pages which links the current page."""
ref_table = collections.defaultdict(list)
for page in self.getReferences(*args, **kwargs):
ref_table[page.namespace()].append(page)
return ref_table
@property
def ref_table(self):
"""
Build link reference table lazily.
This property gives a default table without any parameter set for
getReferences(), whereas self.get_ref_table() is able to accept
parameters.
"""
if not hasattr(self, '_ref_table'):
self._ref_table = self.get_ref_table()
return self._ref_table
def namespaces_with_ref_to_page(self, namespaces=None):
"""
Check if current page has links from pages in namepaces.
If namespaces is None, all namespaces are checked.
Returns a set with namespaces where a ref to page is present.
@param namespaces: Namespace to check
@type namespaces: iterable of Namespace objects
@rtype set: namespaces where a ref to page is present
"""
if namespaces is None:
namespaces = self.site.namespaces()
return set(namespaces) & set(self.ref_table)
class DeletionRobot(MultipleSitesBot, CurrentPageBot):
"""This robot allows deletion of pages en masse."""
def __init__(self, generator, summary, **kwargs):
"""
Initializer.
@param generator: the pages to work on
@type generator: iterable
@param summary: the reason for the (un)deletion
@type summary: str
"""
self.availableOptions.update({
'undelete': False,
'isorphan': 0,
'orphansonly': [],
})
super(DeletionRobot, self).__init__(generator=generator, **kwargs)
self.summary = summary
# Upcast pages to PageWithRefs()
#self.generator = (PageWithRefs(p) for p in self.generator)
def display_references(self):
"""
Display pages that link to the current page, sorted per namespace.
Number of pages to display per namespace is provided by:
- self.opt.isorphan
"""
refs = self.current_page.ref_table
if not refs:
return
total = sum(len(v) for v in refs.values())
if total > 1:
pywikibot.warning('There are {} pages that link to {}.'
.format(total, self.current_page))
else:
pywikibot.warning('There is a page that links to {}.'
.format(self.current_page))
show_n_pages = self.opt.isorphan
width = len(max((ns.canonical_prefix() for ns in refs), key=len))
pagecount = 0
for ns in sorted(refs):
pagecount += 1
n_pages_in_ns = len(refs[ns])
plural = '' if n_pages_in_ns == 1 else 's'
ns_name = ns.canonical_prefix() if ns != ns.MAIN else 'Main:'
ns_id = '[{0}]'.format(ns.id)
pywikibot.output(
'[pc] {0!s:<{width}} {1:>6} {2:>10} page{pl}'.format(
ns_name, ns_id, n_pages_in_ns, width=width, pl=plural, pc=pagecount))
if show_n_pages: # do not show marker if 0 pages are requested.
for page in islice_with_ellipsis(refs[ns], show_n_pages):
pywikibot.output(' {0!s}'.format(page.title()))
def skip_page(self, page):
"""Skip the page under some conditions."""
if self.opt.undelete and page.exists():
pywikibot.output('Skipping: {0} already exists.'.format(page))
return True
if not self.opt.undelete and not page.exists():
pywikibot.output('Skipping: {0} does not exist.'.format(page))
return True
return super(DeletionRobot, self).skip_page(page)
def treat_page(self):
"""Process one page from the generator."""
if len(self.current_page.text) < 4 or 'ek' in self.current_page.text.lower() :
if self.site.user() is None:
self.site.login()
self.current_page.delete(self.summary,
not self.opt.always,
self.opt.always,
quit=True)
def main(*args):
"""
Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
@param args: command line arguments
@type args: str
"""
page_name = ''
summary = None
options = {}
# read command line parameters
local_args = pywikibot.handle_args(args)
gen_factory = pagegenerators.GeneratorFactory()
mysite = pywikibot.Site()
for arg in local_args:
if arg == '-always':
options['always'] = True
elif arg.startswith('-summary'):
if len(arg) == len('-summary'):
summary = pywikibot.input('Enter a reason for the deletion:')
else:
summary = arg[len('-summary:'):]
elif arg.startswith('-images'):
warn('-image option is deprecated. Please use -imageused instead.',
exceptions.ArgumentDeprecationWarning)
local_args.append('-imageused' + arg[7:])
elif arg.startswith('-undelete'):
options['undelete'] = True
elif arg.startswith('-isorphan'):
options['isorphan'] = int(arg[10:]) if arg[10:] != '' else 0
if options['isorphan'] < 0:
options['isorphan'] = False
elif arg.startswith('-orphansonly'):
if arg[13:]:
namespaces = mysite.namespaces.resolve(arg[13:].split(','))
else:
namespaces = mysite.namespaces
options['orphansonly'] = namespaces
else:
gen_factory.handle_arg(arg)
found = arg.find(':') + 1
if found:
page_name = arg[found:]
if not summary:
un = 'un' if 'undelete' in options else ''
if page_name:
if arg.startswith(('-cat', '-subcats')):
summary = i18n.twtranslate(mysite, 'delete-from-category',
{'page': page_name})
elif arg.startswith('-links'):
summary = i18n.twtranslate(mysite,
un + 'delete-linked-pages',
{'page': page_name})
elif arg.startswith('-ref'):
summary = i18n.twtranslate(
mysite, 'delete-referring-pages', {'page': page_name})
elif arg.startswith('-imageused'):
summary = i18n.twtranslate(mysite, un + 'delete-images',
{'page': page_name})
elif arg.startswith('-file'):
summary = i18n.twtranslate(mysite, un + 'delete-from-file')
generator = gen_factory.getCombinedGenerator()
# We are just deleting pages, so we have no need of using a preloading
# page generator to actually get the text of those pages.
if generator:
if summary is None:
summary = pywikibot.input('Enter a reason for the {}deletion:'
.format(['', 'un'][options
.get('undelete', False)]))
bot = DeletionRobot(generator, summary, **options)
bot.run()
else:
pywikibot.bot.suggest_help(missing_generator=True)
if __name__ == '__main__':
main()