forked from alimanfoo/csvvalidator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
example.py
executable file
·129 lines (105 loc) · 4.56 KB
/
example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#!/usr/bin/env python
"""
An executable Python script illustrating the use of the CSVValidator module.
This script illustrates some, but not all, of the features available. For a
complete account of all features available, see the tests.py module.
"""
import argparse
import os
import sys
import csv
from csvvalidator import CSVValidator, enumeration, number_range_inclusive,\
write_problems, datetime_string, RecordError
def create_validator():
"""Create an example CSV validator for patient demographic data."""
field_names = (
'study_id',
'patient_id',
'gender',
'age_years',
'age_months',
'date_inclusion'
)
validator = CSVValidator(field_names)
# basic header and record length checks
validator.add_header_check('EX1', 'bad header')
validator.add_record_length_check('EX2', 'unexpected record length')
# some simple value checks
validator.add_value_check('study_id', int,
'EX3', 'study id must be an integer')
validator.add_value_check('patient_id', int,
'EX4', 'patient id must be an integer')
validator.add_value_check('gender', enumeration('M', 'F'),
'EX5', 'invalid gender')
validator.add_value_check('age_years', number_range_inclusive(0, 120, int),
'EX6', 'invalid age in years')
validator.add_value_check('date_inclusion', datetime_string('%Y-%m-%d'),
'EX7', 'invalid date')
# a more complicated record check
def check_age_variables(r):
age_years = int(r['age_years'])
age_months = int(r['age_months'])
valid = (age_months >= age_years * 12 and
age_months % age_years < 12)
if not valid:
raise RecordError('EX8', 'invalid age variables')
validator.add_record_check(check_age_variables)
return validator
def main():
"""Main function."""
# define a command-line argument parser
description = 'Validate a CSV data file.'
parser = argparse.ArgumentParser(description=description)
parser.add_argument('file',
metavar='FILE',
help='a file to be validated')
parser.add_argument('-l', '--limit',
dest='limit',
type=int,
action='store',
default=0,
help='limit the number of problems reported'
)
parser.add_argument('-s', '--summarize',
dest='summarize',
action='store_true',
default=False,
help='output only a summary of the different types of problem found'
)
parser.add_argument('-e', '--report-unexpected-exceptions',
dest='report_unexpected_exceptions',
action='store_true',
default=False,
help='report any unexpected exceptions as problems'
)
# parse arguments
args = parser.parse_args()
# sanity check arguments
if not os.path.isfile(args.file):
print '%s is not a file' % args.file
sys.exit(1)
with open(args.file, 'r') as f:
# set up a csv reader for the data
data = csv.reader(f, delimiter='\t')
# create a validator
validator = create_validator()
# validate the data from the csv reader
# N.B., validate() returns a list of problems;
# if you expect a large number of problems, use ivalidate() instead
# of validate(), but bear in mind that ivalidate() returns an iterator
# so there is no len()
problems = validator.validate(data,
summarize=args.summarize,
report_unexpected_exceptions=args.report_unexpected_exceptions,
context={'file': args.file})
# write problems to stdout as restructured text
write_problems(problems, sys.stdout,
summarize=args.summarize,
limit=args.limit)
# decide how to exit
if problems: # will not work with ivalidate() because it returns an iterator
sys.exit(1)
else:
sys.exit(0)
if __name__ == "__main__":
main()