-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathemail_textutils.py
110 lines (91 loc) · 3.23 KB
/
email_textutils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env python3
"""Clean up text emails by stripping block-quoted lines and other garbage"""
import sys
from typing import List
import re
testdata = """LOREM IPSUM
> Lorem ipsum dolor sit amet, consectetur adipiscing elit
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed
do eiusmod tempor incididunt ut labore et dolore magna
aliqua. Ut enim ad minim veniam, quis nostrud exercitation
ullamco laboris nisi ut aliquip ex ea commodo consequat.
>> Duis aute irure dolor in reprehenderit in voluptate velit
>> esse cillum dolore eu fugiat nulla pariatur. Excepteur sint
> occaecat cupidatat non proident, sunt in culpa qui officia
> deserunt mollit anim id est laborum.
Duis aute irure dolor in reprehenderit in voluptate velit
esse cillum dolore eu fugiat nulla pariatur. Excepteur sint
occaecat cupidatat non proident, sunt in culpa qui officia
deserunt mollit anim id est laborum.
<p>Duis aute irure dolor in reprehenderit</p>
* Duis aute irure dolor in reprehenderit
* Sunt in culpa qui officia
<> Cupidatat
<> Occaecat
* Ad minim veniam
On Quartidi Ventôse, Ullamco Laboris wrote:
> Voluptate Velit
> Ullamco Laboris
> Irure Dolor
"""
def strip_quoteblocks(instr: str, blocksize: int) -> str:
"""Strip blocks of more than blocksize (int) quoted lines together"""
lines: List[str] = instr.splitlines()
dellist: List[int] = []
badcount: List[int] = []
i = 0
for i in range(len(lines)):
result = is_quoted_line(lines[i])
if not result:
if len(badcount) >= blocksize:
dellist.extend(badcount)
badcount = []
if result:
badcount.append(i)
i += 1
if len(badcount) >= blocksize: # At EOF
dellist.extend(badcount)
for j in sorted(dellist, reverse=True):
del lines[j]
joined = '\n'.join(lines)
return joined.strip()
def strip_trailing_quoteblocks(instr: str, blocksize: int) -> str:
"""Strip blocks of more than blocksize quoted lines together, but only if they are at the end of the file"""
lines: List[str] = instr.strip().splitlines()
dellist: List[int] = []
badcount: List[int] = []
i = 0
for i in range(len(lines)):
result = is_quoted_line(lines[i])
if not result:
badcount = []
if result:
badcount.append(i)
i += 1
if len(badcount) >= blocksize: # Only strip quote blocks that occur at EOF
dellist.extend(badcount)
for j in sorted(dellist, reverse=True):
del lines[j]
joined = '\n'.join(lines)
return joined.strip()
def is_quoted_line(line: str) -> bool:
"""Determine if a plaintext line is quoted text; not safe for HTML components."""
if '>' in line[:3] and '<' not in line[:3]:
return True
else:
return False
def strip_quoteheader(instr: str) -> str:
lines: List[str] = instr.splitlines()
dellist: List[int] = []
i = 0
for i in range(len(lines)):
if re.match("On .*wrote:", lines[i]):
dellist.append(i)
i += 1
for j in sorted(dellist, reverse=True):
del lines[j]
joined = '\n'.join(lines)
return joined.strip()
if __name__ == "__main__":
print(strip_quoteheader(strip_quoteblocks(testdata, 3)))
sys.exit()