forked from wireservice/csvkit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
csvgrep
executable file
·59 lines (44 loc) · 2.49 KB
/
csvgrep
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python
import re
import sys
from csvkit import CSVKitReader, CSVKitWriter
from csvkit.cli import CSVKitUtility, CSVFileType, parse_column_identifiers, print_column_names
from csvkit.grep import FilteringCSVReader
class CSVGrep(CSVKitUtility):
description = 'Search CSV files. Like the unix "grep" command, but for tabular data.'
override_flags = 'f'
def add_arguments(self):
self.argparser.add_argument('-n', '--names', dest='names_only', action='store_true',
help='Display column names and indices from the input CSV and exit.')
self.argparser.add_argument('-c', '--columns', dest='columns',
help='A comma separated list of column indices or names to be searched.')
self.argparser.add_argument('-r', '--regex', dest='regex', action='store_true',
help='If specified, the search pattern will be treated as a Python regular expression.')
self.argparser.add_argument('-i', '--invert-match', dest='inverse', action='store_true',
help='If specified, select non-matching instead of matching rows.')
self.argparser.add_argument('pattern', metavar="PATTERN", nargs='?',
help='The pattern to search for.')
self.argparser.add_argument('file', metavar="FILE", nargs='?', type=CSVFileType(), default=sys.stdin,
help='The CSV file to operate on. If omitted, will accept input on STDIN.')
def main(self):
if self.args.names_only:
print_column_names(self.args.file, sys.stdout, **self.reader_kwargs)
sys.exit()
if not self.args.pattern:
sys.exit('A pattern must be specified unless using the -n option.')
rows = CSVKitReader(self.args.file, **self.reader_kwargs)
column_names = rows.next()
column_ids = parse_column_identifiers(self.args.columns, column_names)
if self.args.regex:
pattern = re.compile(self.args.pattern)
else:
pattern = self.args.pattern
patterns = dict((c, pattern) for c in column_ids)
output = CSVKitWriter(sys.stdout, **self.writer_kwargs)
output.writerow(column_names)
filter_reader = FilteringCSVReader(rows, header=False, patterns=patterns, inverse = self.args.inverse)
for i, row in enumerate(filter_reader):
output.writerow(row)
if __name__ == "__main__":
utility = CSVGrep()
utility.main()