forked from SeattleTestbed/seattlelib_v2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
textops.py
207 lines (147 loc) · 4.77 KB
/
textops.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
"""
<Program Name>
textops.py
<Started>
July 24, 2009
<Author>
Conrad Meyer <[email protected]>
<Purpose>
Provides text-processing utility functions loosely modelled after GNU
coreutils.
Currently supports a subset of the functionality of grep and wc.
"""
def textops_rawtexttolines(text, linedelimiter="\n"):
"""
<Purpose>
Converts raw text (a string) into lines that can be processed by the
functions in this module.
<Arguments>
text:
The text to convert into lines (basically, a sequence of strings).
linedelimiter (optional, defaults to "\n"):
The string denoting an EOL ("\n" on Unix, "\r\n" on Windows).
<Exceptions>
TypeError on bad parameters.
<Side Effects>
None.
<Returns>
A sequence of strings; each element is a line, with newlines removed.
"""
lines = text.split(linedelimiter)
if lines[len(lines)-1] == '':
lines = lines[:-1]
return lines
def textops_grep(match, lines, exclude=False, case_sensitive=True):
"""
<Purpose>
Return a subset of lines matching (or not matching) a given string.
<Arguments>
match:
The string to match.
lines:
The lines to filter.
exclude (optional, defaults to false):
If false, include lines matching 'match'. If true, include lines
not matching 'match'.
case_sensitive (optional, defaults to true):
If false, ignore case when comparing 'match' to the lines.
<Exceptions>
TypeError on bad parameters.
<Side Effects>
None.
<Returns>
A subset of the original lines.
"""
if not case_sensitive:
match = match.lower()
result = []
for line in lines:
if not case_sensitive:
mline = line.lower()
else:
mline = line
if not exclude and mline.find(match) >= 0:
result.append(line)
elif exclude and mline.find(match) < 0:
result.append(line)
return result
def textops_cut(lines, delimiter="\t", characters=None, fields=None,
complement=False, only_delimited=False, output_delimiter=None):
"""
<Purpose>
Perform the same operations as GNU coreutils' cut.
<Arguments>
lines:
The lines to perform a cut on.
delimiter (optional):
Field delimiter. Defaults to "\t" (tab).
characters (optional):
Characters selector. Used to select some subset of characters
in the lines. Should be a sequence argument. Caller must use one
of characters or fields; not both.
Note: unlike cut(1), we use zero-based indices.
fields (optional):
Fields selector. Used to select some subset of fields in the
lines. Should be a sequence argument. Caller must use one
of characters or fields; not both.
Note: unlike cut(1), we use zero-based indices.
complement (optional):
Invert which characters or fields get selected. Defaults to False.
only_delimited (optional):
When selecting fields, only include lines containing the field
delimiter.
output_delimiter (optional):
When selecting fields, join fields with this (defaults to the
input delimiter).
<Exceptions>
TypeError on bad parameters.
<Side Effects>
None.
<Returns>
The filtered lines.
"""
if (characters is None and fields is None) or \
(characters is not None and fields is not None):
raise TypeError("Exactly one of fields or characters must be specified.")
if output_delimiter is None:
output_delimiter = delimiter
if characters is None:
matched_fields = list(fields)
else:
matched_fields = list(characters)
res_list = []
for line in lines:
if characters is not None:
if complement:
select_characters = list(range(0, len(line)))
for character in matched_fields:
select_characters = select_characters.remove(character)
else:
select_characters = matched_fields
res_line = ""
for character in select_characters:
try:
res_line += line[character]
except IndexError:
break
res_list.append(res_line)
else:
split_line = line.split(delimiter)
if len(split_line) == 1:
if not only_delimited:
res_list.append(line)
continue
if complement:
select_fields = list(range(0, len(split_line)))
for field in matched_fields:
select_fields = select_fields.remove(field)
else:
select_fields = matched_fields
split_res_line = []
for field in select_fields:
try:
split_res_line.append(split_line[field])
except IndexError:
break
res_list.append(output_delimiter.join(split_res_line))
return res_list