-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathparse_cmep.py
101 lines (82 loc) · 3.66 KB
/
parse_cmep.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/bin/python3
'''
note: this uses Python 3
This will parse a delimited CMEP file into single row CSV records.
Usage : python3 parse_cmep.py -f CMEP_Format_sample.txt -d '\t' -r 14 --header
Outputted rows : [Meter ID], [Timestamp], [Unit of measurement], [Constant Calculation], [Quality Code], [Value]
Outputted filename : parsed_cmep_[commodity id]_[unit of measurement]_[process id].csv
Author : Mike Czabator
example :
mike@lenovo ~/mike/tools/cmep_file_parser
$ python3 parse_cmep.py --delimiter '\t' --file CMEP_Format_sample.txt --header
***************
Input file : CMEP_Format_sample.txt
input CMEP file lines: 10000
output CSV lines : 96755
time : 2.2290380001068115 seconds / 0 minutes
***************
mike@lenovo ~/mike/tools/cmep_file_parser
$ head parsed_cmep_E_KWH.27772.csv
123456,201811020700,KWH,1,,1.23
123456,201811020800,KWH,1,,1.173
123456,201811020900,KWH,1,,1.173
123456,201811021000,KWH,1,,1.173
123456,201811021100,KWH,1,,1.173
123456,201811021200,KWH,1,,1.23
123456,201811021300,KWH,1,,1.23
123456,201811021400,KWH,1,,1.288
123456,201811021500,KWH,1,,1.23
123456,201811021600,KWH,1,,1.23
'''
import argparse
import os
import codecs
import time
def unescaped_str(arg_str):
return codecs.decode(str(arg_str), 'unicode_escape')
#get arguments
parser = argparse.ArgumentParser()
parser.add_argument("-f","--file",default='CMEP_Format_sample.txt',type=str,help="filename of CMEP file (ex: /tmp/CMEP_Format_sample.txt)",required=False)
parser.add_argument("-d","--delimiter",default='\t',type=unescaped_str,help="Delimiter used in CMEP file, in quotes ( ex: ',' , '|', '\\t' )",required=False)
parser.add_argument("-r","--read_start_column",default=14,type=int,help="Column which interval reads start in CMEP file (default: 14)",required=False)
parser.add_argument('--header', dest='skip_header', action='store_true',help="File contains a header (default)")
parser.add_argument('--no-header', dest='skip_header', action='store_false',help="File does NOT contain a header")
parser.set_defaults(skip_header=True)
args = parser.parse_args()
def parse_file():
startTime = time.time()
line_number = 0
record_count= 0
with open(args.file,"r") as f:
for line in f:
read_start_column = args.read_start_column
#skip header line if --no-header is used
if (line_number == 0 and args.skip_header == True):
line_number+=1
continue
#skip blank lines
if(line in ('\n','\r\n')):
continue
line = line.rstrip()
line_number+=1
record = []
record = line.split(args.delimiter) #split by the delimiter
#open file parsed_cmep_+"+[commodity id]+"_"+[units]+"."+[process id].csv
outfile = open("parsed_cmep_"+record[9]+"_"+record[10]+"."+str(os.getpid())+".csv","a")
while read_start_column < len(record):
if record[read_start_column] == "":
read_start_column+=3
continue
try: # METER ID, Timestamp, Unit of measurement, Constant Calculation, Quality code, Value
outline = '{0},{1},{2},{3},{4},{5}\n'.format(record[7],record[read_start_column],record[10],record[11],record[read_start_column+1],record[read_start_column+2])
record_count+=1
outfile.write(outline)
except:
break
read_start_column+=3
f.close()
outfile.close()
endTime = time.time()
print("\n***************\nInput file : "+args.file+"\ninput CMEP file lines: "+str(line_number)+"\noutput CSV lines : "+str(record_count)+"\ntime : "+str(endTime-startTime)+" seconds / "+str(((endTime-startTime)/60))+" minutes \n***************\n")
if __name__== "__main__":
parse_file()