forked from gmtiddia/ngpu_dynamic_network_creation
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmerge_data.py
157 lines (126 loc) · 4.65 KB
/
merge_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Merge data script for NEST.
# Collects all the JSON data generated by each individual MPI process
# of a single simulation in a given directory.
# Used by benchmark.sh after each simulation run.
# execute with:
# python3 gather_data.py PATH [--out=FILE] [--cleanup]
# with PATH the path to the top directory containing the results of multiple simulations
# with FILE an optional argument to the path for the output file.
# defaults to "merge.json"
# with --cleanup an optional flag to trigger the deletion of files used in merge.
# defaults to False
import json
from pathlib import Path
from argparse import ArgumentParser
def get_paths() -> tuple:
"""
Parses argument given in command line when launching script.
Required:
- path (string): the path to the top directory containing the results of multiple simulations
Optional:
- --out (string): optional argument to the path for the output file. Defaults to "data.json".
- --cleanup (bool flag): optional flag to trigger the deletion of files used in merge. Defaults to False.
Returns:
- Tuple of path (Path), output file (Path), cleanup flag (bool)
"""
parser = ArgumentParser()
parser.add_argument("path", type=str)
parser.add_argument("--out", type=str, default="merge.json")
parser.add_argument("--cleanup", default=False, action="store_true")
args = parser.parse_args()
p = Path(args.path)
o = Path(args.out)
assert p.is_dir() and (not o.exists() or o.is_file())
if o.is_file():
print(f"WARNING: overriding {o}")
return p, o, args.cleanup
def get_json_results(path: Path):
"""
Iteratively loads the content of every JSON file found in depth=0 of given directory.
Build a JSON-like dictionary with same structure as input JSONs,
storing input JSONs as key value pair where the key is the MPI rank
and the value is the content of the file.
Creates additional key that is a collection (lists) of all the collected values.
Arguments:
- path (Path): path to the top directory containing the results of multiple simulations
Returns:
- results (dict): all the collected data as a dictionary.
"""
results = {
"conf": {},
"ranks": {},
"all_values": {
"stats": {},
"timers": {}
}
}
stat_count = {}
for p in path.glob("*.json"):
with p.open() as f:
data = json.load(f)
rank = data["rank"]
conf = data["conf"]
stats = data["stats"]
timers = data["timers"]
if results["conf"] == {}:
results["conf"] = conf
else:
for param in conf:
assert results["conf"][param] == conf[param]
assert rank not in results["ranks"]
results["ranks"][rank] = {
"stats": stats,
"timers": timers
}
if results["all_values"]["stats"] == {}:
for stat in stats:
stat_count[stat] = 1
results["all_values"]["stats"][stat] = stats[stat]
else:
for stat in stats:
stat_count[stat] += 1
results["all_values"]["stats"][stat] += stats[stat]
if results["all_values"]["timers"] == {}:
for timer in timers:
results["all_values"]["timers"][timer] = [timers[timer]]
else:
for timer in timers:
results["all_values"]["timers"][timer].append(timers[timer])
total_procs = results["conf"]["procs"]
assert len(results["ranks"]) == total_procs
for stat in results["all_values"]["stats"]:
assert stat_count[stat] == total_procs
for timer in results["all_values"]["timers"]:
assert len(results["all_values"]["timers"][timer]) == total_procs
return results
def cleanup(path: Path) -> None:
"""
Given a path to a directory, deletes all the files with ".json" extension in depth=0.
Arguments:
- path (Path): Path object to directory.
"""
for p in path.glob("*.json"):
p.unlink()
def save_data(results: dict, out: Path) -> None:
"""
Given a JSON-like dictionary of results, and a path to a file,
saves as JSON data to file.
Arguments:
- results (dict): all the collected data as a dictionary.
- out (Path): Path object to output file.
"""
with out.open("w") as f:
json.dump(results, f, indent=4)
def main():
"""
Main function of script.
"""
path, out, kup = get_paths()
res = get_json_results(path)
if kup:
cleanup(path)
save_data(res, out)
if __name__ == "__main__":
main()