-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnospam.py
50 lines (40 loc) · 1.21 KB
/
nospam.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/env python
import os
import random
import re
import sys
def hide(html):
"""Hide a chunk of HTML text for dumb robots"""
list = []
for c in html:
if random.randrange(2):
list.append('&#%s;' % ord(c))
else:
list.append('&#%s;' % hex(ord(c))[1:])
return ''.join(list)
def filter(html):
"""Filter all dumb robot readable mail anchors from a HTML text"""
def replace(m):
if m.group(1):
return ('<a %shref="%s">%s</a>' %
(m.group(1), hide(m.group(2)), hide(m.group(3))))
else:
return ('<a href="%s">%s</a>' %
(hide(m.group(2)), hide(m.group(3))))
# compatible with latex2html mail anchors
mailRe = re.compile(
r'<a (class="ulink" )?href="(mailto:.+)"(?:\n *)?>(.+)</a>', re.M)
## m = mailRe.search(html)
## if m:
## print m.groups()
return re.sub(mailRe, replace, html)
# filter()
if __name__ == '__main__':
for name in sys.argv[1:]:
html = open(name, 'r').read()
text = filter(html)
if text != html:
os.remove(name)
file = open(name, 'w')
file.write(text)
file.close()