-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathpedantry.py
154 lines (107 loc) · 3.33 KB
/
pedantry.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# -*- coding: utf-8 -*-
"""
Search texts for violations of prescriptive grammar rules.
"""
import nltk
# A helper function used by all the checking functions.
def tokenize_tag(sentence):
"""Tokenize a sentence and tag with POS tags.
Arguments:
sentence: A string
Returns:
List of (token, tag) tuples.
"""
tokens = nltk.word_tokenize(sentence)
return nltk.pos_tag(tokens, tagset='universal')
# The checking functions.
def endswith_preposition(sentence):
"""Check whether a sentence ends with a preposition.
Examples:
>>> endswith_preposition('Who did you go with?')
True
>>> endswith_preposition('With whom did you go?')
False
Arguments:
sentence: A string
Returns:
Boolean.
"""
pos = tokenize_tag(sentence)
for x in reversed(pos):
if x[1] != '.':
return x[1] == 'ADP'
return False
def split_infinitive(sentence):
"""Check whether a sentence contains a split infinitive.
Examples:
>>> split_infinitive('To boldly go.')
True
>>> split_infinitive('To go boldly.')
True
Arguments:
sentence: A string
Returns:
Boolean.
"""
pos = tokenize_tag(sentence)
for i in range(len(pos) - 2):
if pos[i][0].lower() == 'to':
if pos[i+1][1] == 'ADV':
if pos[i+2][1] == 'VERB':
return True
return False
def startswith_conjunction(sentence):
"""Check whether a sentence starts with a conjunction.
Examples:
>>> startswith_conjunction('And it was all a dream.')
True
>>> startswith_conjunction('It was all a dream.')
False
Arguments:
sentence: A string
Returns:
Boolean.
"""
pos = tokenize_tag(sentence)
for x in pos:
if x[1] != '.':
return x[1] == 'CONJ'
return False
# An overall function that applies all the checking functions.
VIOLATION_LABELS = {
'final preposition': endswith_preposition,
'split infinitive': split_infinitive,
'initial conjunction': startswith_conjunction,
}
def check_text(text):
"""Check a text for sentences that violate one of the three rules.
Example:
>>> check_text('And who would you like to boldly go with?')
('And who would you like to boldly go with?',
['final preposition', 'split infinitive', 'initial conjunction'])
Arguments:
text: A string
Returns:
Iterator of tuples of (sentence, [violations]).
"""
for s in nltk.sent_tokenize(text):
violations = [label for label, f in VIOLATION_LABELS.items() if f(s)]
if violations:
yield (s, violations)
# A demo showing violations of the rules in an example dialogue.
if __name__ == '__main__':
text = """
Hello, have you boldly been anywhere lately?
Why would I want to boldly go anywhere?
You might have someone special to boldly go with.
But I don't.
I have boldly been somewhere, ask me about it.
Where have you boldly been then?
To space, the final frontier.
Did you boldly go with anyone?
No, but I'd like to.
And who would you like to boldly go with?
Captain Kirk, he's a total dreamboat.
"""
for s, violations in check_text(text):
print(s, violations, '\n')