-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathanalyse_fi_apprx.py
99 lines (89 loc) · 5.33 KB
/
analyse_fi_apprx.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import json
import os
import csv
import glob
def file_is_empty(path):
return os.stat(path).st_size == 0
def process_file_fi(path, name):
fieldnames = ["experimentNum", "runtime", "thread", "emitNum", "estimate", "item", "lowerBound", "upperBound",
"mapSize"]
with open(name, 'w', newline='') as csv_write_file:
print("Opened file")
with open(path, newline='') as csvfile:
reader = csv.DictReader(csvfile)
writer = csv.DictWriter(csv_write_file, fieldnames=fieldnames)
writer.writeheader()
for row in reader:
print(reader.line_num - 1)
print(row['resultPath'])
result_files = glob.glob(row['resultPath'] + '/*')
for thread, file in enumerate(result_files, 1):
if not file_is_empty(file):
print("process file" + file)
with open(file) as result_file:
for line_number, line in enumerate(result_file, 1):
if len(line.strip()) != 0:
parsed_line = json.loads(line)
if 'resultList' in parsed_line:
for i, result_item in enumerate(parsed_line['resultList']):
writer.writerow(
{'runtime': row['runtime'], 'experimentNum': reader.line_num - 1,
"thread": thread, 'emitNum': line_number, "item": result_item['item'],
"estimate": result_item['estimate'],
"lowerBound": result_item['lowerBound'],
"upperBound": result_item['upperBound'], "mapSize": row['mapSize']})
def process_file_fi_exact(path, name):
fieldnames = ["experimentNum", "runtime", "thread", "emitNum", "estimate", "item"]
with open(name, 'w', newline='') as csv_write_file:
print("Opened file")
with open(path, newline='') as csvfile:
reader = csv.DictReader(csvfile)
writer = csv.DictWriter(csv_write_file, fieldnames=fieldnames)
writer.writeheader()
for row in reader:
print(reader.line_num - 1)
print(row['resultPath'])
result_files = glob.glob(row['resultPath'] + '/*')
for thread, file in enumerate(result_files, 1):
if not file_is_empty(file):
print("process file" + file)
with open(file) as result_file:
for line_number, line in enumerate(result_file, 1):
if len(line.strip()) != 0:
parsed_line = json.loads(line)
if 'frequentItems' in parsed_line:
for i, result_item in enumerate(parsed_line['frequentItems']):
writer.writerow(
{'runtime': row['runtime'], 'experimentNum': reader.line_num - 1,
"thread": thread, 'emitNum': line_number, "item": result_item,
"estimate": parsed_line['frequentItems'][result_item]})
def process_file_hll(path, name):
fieldnames = ["experimentNum", "runtime", "thread", "emitNum", "estimate", "item"]
with open(name, 'w', newline='') as csv_write_file:
print("Opened file")
with open(path, newline='') as csvfile:
reader = csv.DictReader(csvfile)
writer = csv.DictWriter(csv_write_file, fieldnames=fieldnames)
writer.writeheader()
for row in reader:
print(reader.line_num - 1)
print(row['resultPath'])
result_files = glob.glob(row['resultPath'] + '/*')
for thread, file in enumerate(result_files, 1):
if not file_is_empty(file):
print("process file" + file)
with open(file) as result_file:
for line_number, line in enumerate(result_file, 1):
if len(line.strip()) != 0:
parsed_line = json.loads(line)
if 'resultList' in parsed_line:
for i, result_item in enumerate(parsed_line['resultList']):
writer.writerow(
{'runtime': row['runtime'], 'experimentNum': reader.line_num - 1,
"thread": thread, 'emitNum': line_number, "item": result_item['key'],
"estimate": result_item['estimate']})
#process_file_fi('AZFI_APPRX.csv', 'AZFI_APPRX_Rating_Processed.csv')
#process_file_fi('ILFI_APPRX.csv','ILFI_APPRX_Processed.csv')
#process_file_fi_exact('AZFI_EXACT.csv', 'AZFI_EXACT_Rating_Processed.csv')
# process_file_hll('HLL_APPRX.csv', 'HLL_APPRX_REVIEWER_PRODUCT_processed.csv')
process_file_fi('WTFI_APPRX.csv', 'WTFI_APPRX_URL_PROCESSED.csv')